summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
commit2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch)
tree848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/net/ethernet/mellanox/mlx5
parentInitial commit. (diff)
downloadlinux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz
linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig190
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c250
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c2311
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c228
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c574
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c648
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c918
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c115
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h114
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c279
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h323
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c1154
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h196
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c311
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.c97
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.h32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h1246
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c81
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h204
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c615
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.c339
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.c722
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c159
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c263
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c215
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c151
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c1240
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c594
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c362
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c877
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h102
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c518
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.h51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c351
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c569
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c398
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c900
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c759
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c614
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.c606
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.h50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c640
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h69
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.c266
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c154
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c124
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c337
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c119
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c228
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c380
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c457
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c585
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c177
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c209
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c655
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c2272
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h220
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h218
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c991
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h120
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c1766
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c375
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c128
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c165
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.c203
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c331
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h494
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c691
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h186
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c230
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c311
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c191
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h222
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c404
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c469
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h196
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c606
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c205
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c356
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h169
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c84
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c199
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h145
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c782
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c92
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c921
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c138
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c1866
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c1390
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c765
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c1256
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dim.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c2453
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c1582
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c1013
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c6021
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c1521
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h275
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c2494
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c371
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c2482
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h496
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c5307
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h389
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c1058
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c274
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c1150
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c178
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c259
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c163
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c305
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c408
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c1853
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c182
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c190
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h120
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c390
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c529
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c943
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c150
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c2075
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h804
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c4040
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c335
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c446
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c235
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c1001
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c375
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h111
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h214
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c1107
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h124
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c3612
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h365
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c762
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c85
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c851
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c546
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c941
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c294
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c842
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h127
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c365
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c226
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c182
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c1580
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h138
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c368
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c128
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c637
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c432
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h90
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c1026
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h121
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c293
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h109
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c810
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c608
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c158
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c152
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c371
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h97
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h108
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c211
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h92
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c316
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c186
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c159
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c196
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h70
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c2130
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mcg.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h330
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c124
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c796
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c733
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pd.c59
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c1056
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qos.c84
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qos.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rdma.c186
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rdma.h20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c398
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c380
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c571
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h173
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h40
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c364
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h82
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c191
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c354
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c2003
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c824
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c657
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c463
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c171
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c520
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c1108
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c1334
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c1056
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c1390
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h206
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c1960
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c2172
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c319
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h1472
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c820
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h603
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h434
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h192
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c505
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c328
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c1173
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c261
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h308
331 files changed, 139965 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
new file mode 100644
index 000000000..26685fd0f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -0,0 +1,190 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Mellanox driver configuration
+#
+
+config MLX5_CORE
+ tristate "Mellanox 5th generation network adapters (ConnectX series) core driver"
+ depends on PCI
+ select AUXILIARY_BUS
+ select NET_DEVLINK
+ depends on VXLAN || !VXLAN
+ depends on MLXFW || !MLXFW
+ depends on PTP_1588_CLOCK_OPTIONAL
+ depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE
+ help
+ Core driver for low level functionality of the ConnectX-4 and
+ Connect-IB cards by Mellanox Technologies.
+
+config MLX5_FPGA
+ bool "Mellanox Technologies Innova support"
+ depends on MLX5_CORE
+ help
+ Build support for the Innova family of network cards by Mellanox
+ Technologies. Innova network cards are comprised of a ConnectX chip
+ and an FPGA chip on one board. If you select this option, the
+ mlx5_core driver will include the Innova FPGA core and allow building
+ sandbox-specific client drivers.
+
+config MLX5_CORE_EN
+ bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support"
+ depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
+ select PAGE_POOL
+ select DIMLIB
+ help
+ Ethernet support in Mellanox Technologies ConnectX-4 NIC.
+
+config MLX5_EN_ARFS
+ bool "Mellanox MLX5 ethernet accelerated receive flow steering (ARFS) support"
+ depends on MLX5_CORE_EN && RFS_ACCEL
+ default y
+ help
+ Mellanox MLX5 ethernet hardware-accelerated receive flow steering support,
+ Enables ethernet netdevice arfs support and ntuple filtering.
+
+config MLX5_EN_RXNFC
+ bool "Mellanox MLX5 ethernet rx nfc flow steering support"
+ depends on MLX5_CORE_EN
+ default y
+ help
+ Mellanox MLX5 ethernet rx nfc flow steering support
+ Enables ethtool receive network flow classification, which allows user defined
+ flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc
+ API.
+
+config MLX5_MPFS
+ bool "Mellanox Technologies MLX5 MPFS support"
+ depends on MLX5_CORE_EN
+ default y
+ help
+ Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
+ support in ConnectX NIC. MPFs is required for when multi-PF configuration
+ is enabled to allow passing user configured unicast MAC addresses to the
+ requesting PF.
+
+config MLX5_ESWITCH
+ bool "Mellanox Technologies MLX5 SRIOV E-Switch support"
+ depends on MLX5_CORE_EN && NET_SWITCHDEV
+ default y
+ help
+ Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC.
+ E-Switch provides internal SRIOV packet steering and switching for the
+ enabled VFs and PF in two available modes:
+ Legacy SRIOV mode (L2 mac vlan steering based).
+ Switchdev mode (eswitch offloads).
+
+config MLX5_BRIDGE
+ bool
+ depends on MLX5_ESWITCH && BRIDGE
+ default y
+ help
+ mlx5 ConnectX offloads support for Ethernet Bridging (BRIDGE).
+ Enable adding representors of mlx5 uplink and VF ports to Bridge and
+ offloading rules for traffic between such ports. Supports VLANs (trunk and
+ access modes).
+
+config MLX5_CLS_ACT
+ bool "MLX5 TC classifier action support"
+ depends on MLX5_ESWITCH && NET_CLS_ACT
+ default y
+ help
+ mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT),
+ works in both native NIC mode and Switchdev SRIOV mode.
+ Actions get attached to a Hardware offloaded classifiers and are
+ invoked after a successful classification. Actions are used to
+ overwrite the classification result, instantly drop or redirect and/or
+ reformat packets in wire speeds without involving the host cpu.
+
+ If set to N, TC offloads in both NIC and switchdev modes will be disabled.
+ If unsure, set to Y
+
+config MLX5_TC_CT
+ bool "MLX5 TC connection tracking offload support"
+ depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT
+ default y
+ help
+ Say Y here if you want to support offloading connection tracking rules
+ via tc ct action.
+
+ If unsure, set to Y
+
+config MLX5_TC_SAMPLE
+ bool "MLX5 TC sample offload support"
+ depends on MLX5_CLS_ACT
+ depends on PSAMPLE=y || PSAMPLE=n || MLX5_CORE=m
+ default y
+ help
+ Say Y here if you want to support offloading sample rules via tc
+ sample action.
+ If set to N, will not be able to configure tc rules with sample
+ action.
+
+ If unsure, set to Y
+
+config MLX5_CORE_EN_DCB
+ bool "Data Center Bridging (DCB) Support"
+ default y
+ depends on MLX5_CORE_EN && DCB
+ help
+ Say Y here if you want to use Data Center Bridging (DCB) in the
+ driver.
+ If set to N, will not be able to configure QoS and ratelimit attributes.
+ This flag is depended on the kernel's DCB support.
+
+ If unsure, set to Y
+
+config MLX5_CORE_IPOIB
+ bool "Mellanox 5th generation network adapters (connectX series) IPoIB offloads support"
+ depends on MLX5_CORE_EN
+ help
+ MLX5 IPoIB offloads & acceleration support.
+
+config MLX5_EN_MACSEC
+ bool "Connect-X support for MACSec offload"
+ depends on MLX5_CORE_EN
+ depends on MACSEC
+ default n
+ help
+ Build support for MACsec cryptography-offload acceleration in the NIC.
+
+config MLX5_EN_IPSEC
+ bool "Mellanox Technologies IPsec Connect-X support"
+ depends on MLX5_CORE_EN
+ depends on XFRM_OFFLOAD
+ depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+ help
+ Build support for IPsec cryptography-offload acceleration in the NIC.
+
+config MLX5_EN_TLS
+ bool "Mellanox Technologies TLS Connect-X support"
+ depends on TLS_DEVICE
+ depends on TLS=y || MLX5_CORE=m
+ depends on MLX5_CORE_EN
+ help
+ Build support for TLS cryptography-offload acceleration in the NIC.
+
+config MLX5_SW_STEERING
+ bool "Mellanox Technologies software-managed steering"
+ depends on MLX5_CORE_EN && MLX5_ESWITCH
+ select CRC32
+ default y
+ help
+ Build support for software-managed steering in the NIC.
+
+config MLX5_SF
+ bool "Mellanox Technologies subfunction device support using auxiliary device"
+ depends on MLX5_CORE && MLX5_CORE_EN
+ help
+ Build support for subfuction device in the NIC. A Mellanox subfunction
+ device can support RDMA, netdevice and vdpa device.
+ It is similar to a SRIOV VF but it doesn't require SRIOV support.
+
+config MLX5_SF_MANAGER
+ bool
+ depends on MLX5_SF && MLX5_ESWITCH
+ default y
+ help
+ Build support for subfuction port in the NIC. A Mellanox subfunction
+ port is managed through devlink. A subfunction supports RDMA, netdevice
+ and vdpa device. It is similar to a SRIOV VF but it doesn't require
+ SRIOV support.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
new file mode 100644
index 000000000..a22c32aab
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox 5th generation network adapters
+# (ConnectX series) core & netdev driver
+#
+
+subdir-ccflags-y += -I$(src)
+
+obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
+
+#
+# mlx5 core basic
+#
+mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
+ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
+ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
+ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
+ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
+ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
+ fw_reset.o qos.o lib/tout.o lib/aso.o
+
+#
+# Netdev basic
+#
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
+ en/channels.o en_main.o en_common.o en_fs.o en_ethtool.o \
+ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
+ en_selftest.o en/port.o en/monitor_stats.o en/health.o \
+ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
+ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
+ en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \
+ lib/crypto.o
+
+#
+# Netdev extra
+#
+mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
+mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \
+ en_rep.o en/rep/bond.o en/mod_hdr.o \
+ en/mapping.o lag/mpesw.o
+mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
+ lib/fs_chains.o en/tc_tun.o \
+ esw/indir_table.o en/tc_tun_encap.o \
+ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
+ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \
+ en/tc/post_act.o en/tc/int_port.o en/tc/meter.o \
+ en/tc/post_meter.o
+
+mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/act/trap.o \
+ en/tc/act/accept.o en/tc/act/mark.o en/tc/act/goto.o \
+ en/tc/act/tun.o en/tc/act/csum.o en/tc/act/pedit.o \
+ en/tc/act/vlan.o en/tc/act/vlan_mangle.o en/tc/act/mpls.o \
+ en/tc/act/mirred.o en/tc/act/mirred_nic.o \
+ en/tc/act/ct.o en/tc/act/sample.o en/tc/act/ptype.o \
+ en/tc/act/redirect_ingress.o en/tc/act/police.o
+
+ifneq ($(CONFIG_MLX5_TC_CT),)
+ mlx5_core-y += en/tc_ct.o en/tc/ct_fs_dmfs.o
+ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += en/tc/ct_fs_smfs.o
+endif
+
+mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
+
+#
+# Core extra
+#
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
+ ecpf.o rdma.o esw/legacy.o \
+ esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o
+
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
+ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
+ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
+
+mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o
+
+mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
+mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
+mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
+
+#
+# Ipoib netdev
+#
+mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o
+
+#
+# Accelerations & FPGA
+#
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
+
+mlx5_core-$(CONFIG_MLX5_EN_MACSEC) += en_accel/macsec.o en_accel/macsec_fs.o \
+ en_accel/macsec_stats.o
+
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
+ en_accel/ipsec_stats.o en_accel/ipsec_fs.o \
+ en_accel/ipsec_offload.o
+
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \
+ en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \
+ en_accel/ktls_tx.o en_accel/ktls_rx.o
+
+mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \
+ steering/dr_matcher.o steering/dr_rule.o \
+ steering/dr_icm_pool.o steering/dr_buddy.o \
+ steering/dr_ste.o steering/dr_send.o \
+ steering/dr_ste_v0.o steering/dr_ste_v1.o \
+ steering/dr_ste_v2.o \
+ steering/dr_cmd.o steering/dr_fw.o \
+ steering/dr_action.o steering/fs_dr.o \
+ steering/dr_dbg.o lib/smfs.o
+#
+# SF device
+#
+mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o
+
+#
+# SF manager
+#
+mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
new file mode 100644
index 000000000..6aca004e8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+
+struct mlx5_db_pgdir {
+ struct list_head list;
+ unsigned long *bitmap;
+ __be32 *db_page;
+ dma_addr_t db_dma;
+};
+
+/* Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0.
+ */
+
+static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
+ size_t size, dma_addr_t *dma_handle,
+ int node)
+{
+ struct device *device = mlx5_core_dma_dev(dev);
+ struct mlx5_priv *priv = &dev->priv;
+ int original_node;
+ void *cpu_handle;
+
+ mutex_lock(&priv->alloc_mutex);
+ original_node = dev_to_node(device);
+ set_dev_node(device, node);
+ cpu_handle = dma_alloc_coherent(device, size, dma_handle,
+ GFP_KERNEL);
+ set_dev_node(device, original_node);
+ mutex_unlock(&priv->alloc_mutex);
+ return cpu_handle;
+}
+
+int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+ struct mlx5_frag_buf *buf, int node)
+{
+ int i;
+
+ buf->size = size;
+ buf->npages = DIV_ROUND_UP(size, PAGE_SIZE);
+ buf->page_shift = PAGE_SHIFT;
+ buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list),
+ GFP_KERNEL);
+ if (!buf->frags)
+ goto err_out;
+
+ for (i = 0; i < buf->npages; i++) {
+ struct mlx5_buf_list *frag = &buf->frags[i];
+ int frag_sz = min_t(int, size, PAGE_SIZE);
+
+ frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz,
+ &frag->map, node);
+ if (!frag->buf)
+ goto err_free_buf;
+ if (frag->map & ((1 << buf->page_shift) - 1)) {
+ dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz,
+ buf->frags[i].buf, buf->frags[i].map);
+ mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n",
+ &frag->map, buf->page_shift);
+ goto err_free_buf;
+ }
+ size -= frag_sz;
+ }
+
+ return 0;
+
+err_free_buf:
+ while (i--)
+ dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, buf->frags[i].buf,
+ buf->frags[i].map);
+ kfree(buf->frags);
+err_out:
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);
+
+void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
+{
+ int size = buf->size;
+ int i;
+
+ for (i = 0; i < buf->npages; i++) {
+ int frag_sz = min_t(int, size, PAGE_SIZE);
+
+ dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf,
+ buf->frags[i].map);
+ size -= frag_sz;
+ }
+ kfree(buf->frags);
+}
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_free);
+
+static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
+ int node)
+{
+ u32 db_per_page = PAGE_SIZE / cache_line_size();
+ struct mlx5_db_pgdir *pgdir;
+
+ pgdir = kzalloc_node(sizeof(*pgdir), GFP_KERNEL, node);
+ if (!pgdir)
+ return NULL;
+
+ pgdir->bitmap = bitmap_zalloc_node(db_per_page, GFP_KERNEL, node);
+ if (!pgdir->bitmap) {
+ kfree(pgdir);
+ return NULL;
+ }
+
+ bitmap_fill(pgdir->bitmap, db_per_page);
+
+ pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE,
+ &pgdir->db_dma, node);
+ if (!pgdir->db_page) {
+ bitmap_free(pgdir->bitmap);
+ kfree(pgdir);
+ return NULL;
+ }
+
+ return pgdir;
+}
+
+static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir,
+ struct mlx5_db *db)
+{
+ u32 db_per_page = PAGE_SIZE / cache_line_size();
+ int offset;
+ int i;
+
+ i = find_first_bit(pgdir->bitmap, db_per_page);
+ if (i >= db_per_page)
+ return -ENOMEM;
+
+ __clear_bit(i, pgdir->bitmap);
+
+ db->u.pgdir = pgdir;
+ db->index = i;
+ offset = db->index * cache_line_size();
+ db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page);
+ db->dma = pgdir->db_dma + offset;
+
+ db->db[0] = 0;
+ db->db[1] = 0;
+
+ return 0;
+}
+
+int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node)
+{
+ struct mlx5_db_pgdir *pgdir;
+ int ret = 0;
+
+ mutex_lock(&dev->priv.pgdir_mutex);
+
+ list_for_each_entry(pgdir, &dev->priv.pgdir_list, list)
+ if (!mlx5_alloc_db_from_pgdir(pgdir, db))
+ goto out;
+
+ pgdir = mlx5_alloc_db_pgdir(dev, node);
+ if (!pgdir) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&pgdir->list, &dev->priv.pgdir_list);
+
+ /* This should never fail -- we just allocated an empty page: */
+ WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db));
+
+out:
+ mutex_unlock(&dev->priv.pgdir_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx5_db_alloc_node);
+
+void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+ u32 db_per_page = PAGE_SIZE / cache_line_size();
+
+ mutex_lock(&dev->priv.pgdir_mutex);
+
+ __set_bit(db->index, db->u.pgdir->bitmap);
+
+ if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) {
+ dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE,
+ db->u.pgdir->db_page, db->u.pgdir->db_dma);
+ list_del(&db->u.pgdir->list);
+ bitmap_free(db->u.pgdir->bitmap);
+ kfree(db->u.pgdir);
+ }
+
+ mutex_unlock(&dev->priv.pgdir_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx5_db_free);
+
+void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm)
+{
+ int i;
+
+ WARN_ON(perm & 0xfc);
+ for (i = 0; i < buf->npages; i++)
+ pas[i] = cpu_to_be64(buf->frags[i].map | perm);
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array_perm);
+
+void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas)
+{
+ mlx5_fill_page_frag_array_perm(buf, pas, 0);
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
new file mode 100644
index 000000000..ac6a0785b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -0,0 +1,2311 @@
+/*
+ * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/highmem.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/random.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/debugfs.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/tout.h"
+#define CREATE_TRACE_POINTS
+#include "diag/cmd_tracepoint.h"
+
+struct mlx5_ifc_mbox_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_mbox_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+enum {
+ CMD_IF_REV = 5,
+};
+
+enum {
+ CMD_MODE_POLLING,
+ CMD_MODE_EVENTS
+};
+
+enum {
+ MLX5_CMD_DELIVERY_STAT_OK = 0x0,
+ MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1,
+ MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2,
+ MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3,
+ MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4,
+ MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5,
+ MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6,
+ MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7,
+ MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8,
+ MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9,
+ MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10,
+};
+
+static u16 in_to_opcode(void *in)
+{
+ return MLX5_GET(mbox_in, in, opcode);
+}
+
+/* Returns true for opcodes that might be triggered very frequently and throttle
+ * the command interface. Limit their command slots usage.
+ */
+static bool mlx5_cmd_is_throttle_opcode(u16 op)
+{
+ switch (op) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ return true;
+ }
+ return false;
+}
+
+static struct mlx5_cmd_work_ent *
+cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out, void *uout, int uout_size,
+ mlx5_cmd_cbk_t cbk, void *context, int page_queue)
+{
+ gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
+ struct mlx5_cmd_work_ent *ent;
+
+ ent = kzalloc(sizeof(*ent), alloc_flags);
+ if (!ent)
+ return ERR_PTR(-ENOMEM);
+
+ ent->idx = -EINVAL;
+ ent->in = in;
+ ent->out = out;
+ ent->uout = uout;
+ ent->uout_size = uout_size;
+ ent->callback = cbk;
+ ent->context = context;
+ ent->cmd = cmd;
+ ent->page_queue = page_queue;
+ ent->op = in_to_opcode(in->first.data);
+ refcount_set(&ent->refcnt, 1);
+
+ return ent;
+}
+
+static void cmd_free_ent(struct mlx5_cmd_work_ent *ent)
+{
+ kfree(ent);
+}
+
+static u8 alloc_token(struct mlx5_cmd *cmd)
+{
+ u8 token;
+
+ spin_lock(&cmd->token_lock);
+ cmd->token++;
+ if (cmd->token == 0)
+ cmd->token++;
+ token = cmd->token;
+ spin_unlock(&cmd->token_lock);
+
+ return token;
+}
+
+static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ ret = find_first_bit(&cmd->vars.bitmask, cmd->vars.max_reg_cmds);
+ if (ret < cmd->vars.max_reg_cmds) {
+ clear_bit(ret, &cmd->vars.bitmask);
+ ent->idx = ret;
+ cmd->ent_arr[ent->idx] = ent;
+ }
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+
+ return ret < cmd->vars.max_reg_cmds ? ret : -ENOMEM;
+}
+
+static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
+{
+ lockdep_assert_held(&cmd->alloc_lock);
+ set_bit(idx, &cmd->vars.bitmask);
+}
+
+static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
+{
+ refcount_inc(&ent->refcnt);
+}
+
+static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
+{
+ struct mlx5_cmd *cmd = ent->cmd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ if (!refcount_dec_and_test(&ent->refcnt))
+ goto out;
+
+ if (ent->idx >= 0) {
+ cmd_free_index(cmd, ent->idx);
+ up(ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem);
+ }
+
+ cmd_free_ent(ent);
+out:
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+}
+
+static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
+{
+ return cmd->cmd_buf + (idx << cmd->vars.log_stride);
+}
+
+static int mlx5_calc_cmd_blocks(struct mlx5_cmd_msg *msg)
+{
+ int size = msg->len;
+ int blen = size - min_t(int, sizeof(msg->first.data), size);
+
+ return DIV_ROUND_UP(blen, MLX5_CMD_DATA_BLOCK_SIZE);
+}
+
+static u8 xor8_buf(void *buf, size_t offset, int len)
+{
+ u8 *ptr = buf;
+ u8 sum = 0;
+ int i;
+ int end = len + offset;
+
+ for (i = offset; i < end; i++)
+ sum ^= ptr[i];
+
+ return sum;
+}
+
+static int verify_block_sig(struct mlx5_cmd_prot_block *block)
+{
+ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+ int xor_len = sizeof(*block) - sizeof(block->data) - 1;
+
+ if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
+ return -EHWPOISON;
+
+ if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
+ return -EHWPOISON;
+
+ return 0;
+}
+
+static void calc_block_sig(struct mlx5_cmd_prot_block *block)
+{
+ int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2;
+ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+
+ block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len);
+ block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1);
+}
+
+static void calc_chain_sig(struct mlx5_cmd_msg *msg)
+{
+ struct mlx5_cmd_mailbox *next = msg->next;
+ int n = mlx5_calc_cmd_blocks(msg);
+ int i = 0;
+
+ for (i = 0; i < n && next; i++) {
+ calc_block_sig(next->buf);
+ next = next->next;
+ }
+}
+
+static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
+{
+ ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay));
+ if (csum) {
+ calc_chain_sig(ent->in);
+ calc_chain_sig(ent->out);
+ }
+}
+
+static void poll_timeout(struct mlx5_cmd_work_ent *ent)
+{
+ struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd);
+ u64 cmd_to_ms = mlx5_tout_ms(dev, CMD);
+ unsigned long poll_end;
+ u8 own;
+
+ poll_end = jiffies + msecs_to_jiffies(cmd_to_ms + 1000);
+
+ do {
+ own = READ_ONCE(ent->lay->status_own);
+ if (!(own & CMD_OWNER_HW)) {
+ ent->ret = 0;
+ return;
+ }
+ cond_resched();
+ } while (time_before(jiffies, poll_end));
+
+ ent->ret = -ETIMEDOUT;
+}
+
+static int verify_signature(struct mlx5_cmd_work_ent *ent)
+{
+ struct mlx5_cmd_mailbox *next = ent->out->next;
+ int n = mlx5_calc_cmd_blocks(ent->out);
+ int err;
+ u8 sig;
+ int i = 0;
+
+ sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
+ if (sig != 0xff)
+ return -EHWPOISON;
+
+ for (i = 0; i < n && next; i++) {
+ err = verify_block_sig(next->buf);
+ if (err)
+ return -EHWPOISON;
+
+ next = next->next;
+ }
+
+ return 0;
+}
+
+static void dump_buf(void *buf, int size, int data_only, int offset, int idx)
+{
+ __be32 *p = buf;
+ int i;
+
+ for (i = 0; i < size; i += 16) {
+ pr_debug("cmd[%d]: %03x: %08x %08x %08x %08x\n", idx, offset,
+ be32_to_cpu(p[0]), be32_to_cpu(p[1]),
+ be32_to_cpu(p[2]), be32_to_cpu(p[3]));
+ p += 4;
+ offset += 16;
+ }
+ if (!data_only)
+ pr_debug("\n");
+}
+
+static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
+ u32 *synd, u8 *status)
+{
+ *synd = 0;
+ *status = 0;
+
+ switch (op) {
+ case MLX5_CMD_OP_TEARDOWN_HCA:
+ case MLX5_CMD_OP_DISABLE_HCA:
+ case MLX5_CMD_OP_MANAGE_PAGES:
+ case MLX5_CMD_OP_DESTROY_MKEY:
+ case MLX5_CMD_OP_DESTROY_EQ:
+ case MLX5_CMD_OP_DESTROY_CQ:
+ case MLX5_CMD_OP_DESTROY_QP:
+ case MLX5_CMD_OP_DESTROY_PSV:
+ case MLX5_CMD_OP_DESTROY_SRQ:
+ case MLX5_CMD_OP_DESTROY_XRC_SRQ:
+ case MLX5_CMD_OP_DESTROY_XRQ:
+ case MLX5_CMD_OP_DESTROY_DCT:
+ case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT:
+ case MLX5_CMD_OP_DEALLOC_PD:
+ case MLX5_CMD_OP_DEALLOC_UAR:
+ case MLX5_CMD_OP_DETACH_FROM_MCG:
+ case MLX5_CMD_OP_DEALLOC_XRCD:
+ case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN:
+ case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_DESTROY_LAG:
+ case MLX5_CMD_OP_DESTROY_VPORT_LAG:
+ case MLX5_CMD_OP_DESTROY_TIR:
+ case MLX5_CMD_OP_DESTROY_SQ:
+ case MLX5_CMD_OP_DESTROY_RQ:
+ case MLX5_CMD_OP_DESTROY_RMP:
+ case MLX5_CMD_OP_DESTROY_TIS:
+ case MLX5_CMD_OP_DESTROY_RQT:
+ case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+ case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
+ case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_2ERR_QP:
+ case MLX5_CMD_OP_2RST_QP:
+ case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
+ case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT:
+ case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_FPGA_DESTROY_QP:
+ case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_DEALLOC_MEMIC:
+ case MLX5_CMD_OP_PAGE_FAULT_RESUME:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
+ case MLX5_CMD_OP_DEALLOC_SF:
+ case MLX5_CMD_OP_DESTROY_UCTX:
+ case MLX5_CMD_OP_DESTROY_UMEM:
+ case MLX5_CMD_OP_MODIFY_RQT:
+ return MLX5_CMD_STAT_OK;
+
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_ADAPTER:
+ case MLX5_CMD_OP_INIT_HCA:
+ case MLX5_CMD_OP_ENABLE_HCA:
+ case MLX5_CMD_OP_QUERY_PAGES:
+ case MLX5_CMD_OP_SET_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_ISSI:
+ case MLX5_CMD_OP_SET_ISSI:
+ case MLX5_CMD_OP_CREATE_MKEY:
+ case MLX5_CMD_OP_QUERY_MKEY:
+ case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
+ case MLX5_CMD_OP_CREATE_EQ:
+ case MLX5_CMD_OP_QUERY_EQ:
+ case MLX5_CMD_OP_GEN_EQE:
+ case MLX5_CMD_OP_CREATE_CQ:
+ case MLX5_CMD_OP_QUERY_CQ:
+ case MLX5_CMD_OP_MODIFY_CQ:
+ case MLX5_CMD_OP_CREATE_QP:
+ case MLX5_CMD_OP_RST2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ case MLX5_CMD_OP_QUERY_QP:
+ case MLX5_CMD_OP_SQD_RTS_QP:
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_CREATE_PSV:
+ case MLX5_CMD_OP_CREATE_SRQ:
+ case MLX5_CMD_OP_QUERY_SRQ:
+ case MLX5_CMD_OP_ARM_RQ:
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ case MLX5_CMD_OP_ARM_XRC_SRQ:
+ case MLX5_CMD_OP_CREATE_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_CREATE_DCT:
+ case MLX5_CMD_OP_DRAIN_DCT:
+ case MLX5_CMD_OP_QUERY_DCT:
+ case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ case MLX5_CMD_OP_QUERY_VPORT_STATE:
+ case MLX5_CMD_OP_MODIFY_VPORT_STATE:
+ case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+ case MLX5_CMD_OP_SET_ROCE_ADDRESS:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
+ case MLX5_CMD_OP_QUERY_VNIC_ENV:
+ case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_SET_MONITOR_COUNTER:
+ case MLX5_CMD_OP_ARM_MONITOR_COUNTER:
+ case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
+ case MLX5_CMD_OP_QUERY_RATE_LIMIT:
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT:
+ case MLX5_CMD_OP_ALLOC_PD:
+ case MLX5_CMD_OP_ALLOC_UAR:
+ case MLX5_CMD_OP_CONFIG_INT_MODERATION:
+ case MLX5_CMD_OP_ACCESS_REG:
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
+ case MLX5_CMD_OP_MAD_IFC:
+ case MLX5_CMD_OP_QUERY_MAD_DEMUX:
+ case MLX5_CMD_OP_SET_MAD_DEMUX:
+ case MLX5_CMD_OP_NOP:
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ case MLX5_CMD_OP_QUERY_CONG_STATUS:
+ case MLX5_CMD_OP_MODIFY_CONG_STATUS:
+ case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+ case MLX5_CMD_OP_MODIFY_CONG_PARAMS:
+ case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_CREATE_LAG:
+ case MLX5_CMD_OP_MODIFY_LAG:
+ case MLX5_CMD_OP_QUERY_LAG:
+ case MLX5_CMD_OP_CREATE_VPORT_LAG:
+ case MLX5_CMD_OP_CREATE_TIR:
+ case MLX5_CMD_OP_MODIFY_TIR:
+ case MLX5_CMD_OP_QUERY_TIR:
+ case MLX5_CMD_OP_CREATE_SQ:
+ case MLX5_CMD_OP_MODIFY_SQ:
+ case MLX5_CMD_OP_QUERY_SQ:
+ case MLX5_CMD_OP_CREATE_RQ:
+ case MLX5_CMD_OP_MODIFY_RQ:
+ case MLX5_CMD_OP_QUERY_RQ:
+ case MLX5_CMD_OP_CREATE_RMP:
+ case MLX5_CMD_OP_MODIFY_RMP:
+ case MLX5_CMD_OP_QUERY_RMP:
+ case MLX5_CMD_OP_CREATE_TIS:
+ case MLX5_CMD_OP_MODIFY_TIS:
+ case MLX5_CMD_OP_QUERY_TIS:
+ case MLX5_CMD_OP_CREATE_RQT:
+ case MLX5_CMD_OP_QUERY_RQT:
+
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_FPGA_CREATE_QP:
+ case MLX5_CMD_OP_FPGA_MODIFY_QP:
+ case MLX5_CMD_OP_FPGA_QUERY_QP:
+ case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_CREATE_UCTX:
+ case MLX5_CMD_OP_CREATE_UMEM:
+ case MLX5_CMD_OP_ALLOC_MEMIC:
+ case MLX5_CMD_OP_MODIFY_XRQ:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_QUERY_VHCA_STATE:
+ case MLX5_CMD_OP_MODIFY_VHCA_STATE:
+ case MLX5_CMD_OP_ALLOC_SF:
+ case MLX5_CMD_OP_SUSPEND_VHCA:
+ case MLX5_CMD_OP_RESUME_VHCA:
+ case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE:
+ case MLX5_CMD_OP_SAVE_VHCA_STATE:
+ case MLX5_CMD_OP_LOAD_VHCA_STATE:
+ *status = MLX5_DRIVER_STATUS_ABORTED;
+ *synd = MLX5_DRIVER_SYND;
+ return -ENOLINK;
+ default:
+ mlx5_core_err(dev, "Unknown FW command (%d)\n", op);
+ return -EINVAL;
+ }
+}
+
+const char *mlx5_command_str(int command)
+{
+#define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd
+
+ switch (command) {
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP);
+ MLX5_COMMAND_STR_CASE(QUERY_ADAPTER);
+ MLX5_COMMAND_STR_CASE(INIT_HCA);
+ MLX5_COMMAND_STR_CASE(TEARDOWN_HCA);
+ MLX5_COMMAND_STR_CASE(ENABLE_HCA);
+ MLX5_COMMAND_STR_CASE(DISABLE_HCA);
+ MLX5_COMMAND_STR_CASE(QUERY_PAGES);
+ MLX5_COMMAND_STR_CASE(MANAGE_PAGES);
+ MLX5_COMMAND_STR_CASE(SET_HCA_CAP);
+ MLX5_COMMAND_STR_CASE(QUERY_ISSI);
+ MLX5_COMMAND_STR_CASE(SET_ISSI);
+ MLX5_COMMAND_STR_CASE(SET_DRIVER_VERSION);
+ MLX5_COMMAND_STR_CASE(CREATE_MKEY);
+ MLX5_COMMAND_STR_CASE(QUERY_MKEY);
+ MLX5_COMMAND_STR_CASE(DESTROY_MKEY);
+ MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS);
+ MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME);
+ MLX5_COMMAND_STR_CASE(CREATE_EQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_EQ);
+ MLX5_COMMAND_STR_CASE(QUERY_EQ);
+ MLX5_COMMAND_STR_CASE(GEN_EQE);
+ MLX5_COMMAND_STR_CASE(CREATE_CQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_CQ);
+ MLX5_COMMAND_STR_CASE(QUERY_CQ);
+ MLX5_COMMAND_STR_CASE(MODIFY_CQ);
+ MLX5_COMMAND_STR_CASE(CREATE_QP);
+ MLX5_COMMAND_STR_CASE(DESTROY_QP);
+ MLX5_COMMAND_STR_CASE(RST2INIT_QP);
+ MLX5_COMMAND_STR_CASE(INIT2RTR_QP);
+ MLX5_COMMAND_STR_CASE(RTR2RTS_QP);
+ MLX5_COMMAND_STR_CASE(RTS2RTS_QP);
+ MLX5_COMMAND_STR_CASE(SQERR2RTS_QP);
+ MLX5_COMMAND_STR_CASE(2ERR_QP);
+ MLX5_COMMAND_STR_CASE(2RST_QP);
+ MLX5_COMMAND_STR_CASE(QUERY_QP);
+ MLX5_COMMAND_STR_CASE(SQD_RTS_QP);
+ MLX5_COMMAND_STR_CASE(INIT2INIT_QP);
+ MLX5_COMMAND_STR_CASE(CREATE_PSV);
+ MLX5_COMMAND_STR_CASE(DESTROY_PSV);
+ MLX5_COMMAND_STR_CASE(CREATE_SRQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_SRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_SRQ);
+ MLX5_COMMAND_STR_CASE(ARM_RQ);
+ MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(CREATE_DCT);
+ MLX5_COMMAND_STR_CASE(DESTROY_DCT);
+ MLX5_COMMAND_STR_CASE(DRAIN_DCT);
+ MLX5_COMMAND_STR_CASE(QUERY_DCT);
+ MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION);
+ MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE);
+ MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE);
+ MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS);
+ MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS);
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID);
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY);
+ MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV);
+ MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER);
+ MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER);
+ MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER);
+ MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
+ MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
+ MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT);
+ MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT);
+ MLX5_COMMAND_STR_CASE(ALLOC_PD);
+ MLX5_COMMAND_STR_CASE(DEALLOC_PD);
+ MLX5_COMMAND_STR_CASE(ALLOC_UAR);
+ MLX5_COMMAND_STR_CASE(DEALLOC_UAR);
+ MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION);
+ MLX5_COMMAND_STR_CASE(ACCESS_REG);
+ MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG);
+ MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG);
+ MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG);
+ MLX5_COMMAND_STR_CASE(MAD_IFC);
+ MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX);
+ MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX);
+ MLX5_COMMAND_STR_CASE(NOP);
+ MLX5_COMMAND_STR_CASE(ALLOC_XRCD);
+ MLX5_COMMAND_STR_CASE(DEALLOC_XRCD);
+ MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN);
+ MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS);
+ MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS);
+ MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS);
+ MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS);
+ MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS);
+ MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT);
+ MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT);
+ MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(SET_WOL_ROL);
+ MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL);
+ MLX5_COMMAND_STR_CASE(CREATE_LAG);
+ MLX5_COMMAND_STR_CASE(MODIFY_LAG);
+ MLX5_COMMAND_STR_CASE(QUERY_LAG);
+ MLX5_COMMAND_STR_CASE(DESTROY_LAG);
+ MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG);
+ MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG);
+ MLX5_COMMAND_STR_CASE(CREATE_TIR);
+ MLX5_COMMAND_STR_CASE(MODIFY_TIR);
+ MLX5_COMMAND_STR_CASE(DESTROY_TIR);
+ MLX5_COMMAND_STR_CASE(QUERY_TIR);
+ MLX5_COMMAND_STR_CASE(CREATE_SQ);
+ MLX5_COMMAND_STR_CASE(MODIFY_SQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_SQ);
+ MLX5_COMMAND_STR_CASE(QUERY_SQ);
+ MLX5_COMMAND_STR_CASE(CREATE_RQ);
+ MLX5_COMMAND_STR_CASE(MODIFY_RQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_RQ);
+ MLX5_COMMAND_STR_CASE(QUERY_RQ);
+ MLX5_COMMAND_STR_CASE(CREATE_RMP);
+ MLX5_COMMAND_STR_CASE(MODIFY_RMP);
+ MLX5_COMMAND_STR_CASE(DESTROY_RMP);
+ MLX5_COMMAND_STR_CASE(QUERY_RMP);
+ MLX5_COMMAND_STR_CASE(CREATE_TIS);
+ MLX5_COMMAND_STR_CASE(MODIFY_TIS);
+ MLX5_COMMAND_STR_CASE(DESTROY_TIS);
+ MLX5_COMMAND_STR_CASE(QUERY_TIS);
+ MLX5_COMMAND_STR_CASE(CREATE_RQT);
+ MLX5_COMMAND_STR_CASE(MODIFY_RQT);
+ MLX5_COMMAND_STR_CASE(DESTROY_RQT);
+ MLX5_COMMAND_STR_CASE(QUERY_RQT);
+ MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ROOT);
+ MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP);
+ MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP);
+ MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
+ MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
+ MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
+ MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
+ MLX5_COMMAND_STR_CASE(CREATE_XRQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_XRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_XRQ);
+ MLX5_COMMAND_STR_CASE(ARM_XRQ);
+ MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
+ MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
+ MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS);
+ MLX5_COMMAND_STR_CASE(CREATE_UCTX);
+ MLX5_COMMAND_STR_CASE(DESTROY_UCTX);
+ MLX5_COMMAND_STR_CASE(CREATE_UMEM);
+ MLX5_COMMAND_STR_CASE(DESTROY_UMEM);
+ MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR);
+ MLX5_COMMAND_STR_CASE(MODIFY_XRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_VHCA_STATE);
+ MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE);
+ MLX5_COMMAND_STR_CASE(ALLOC_SF);
+ MLX5_COMMAND_STR_CASE(DEALLOC_SF);
+ MLX5_COMMAND_STR_CASE(SUSPEND_VHCA);
+ MLX5_COMMAND_STR_CASE(RESUME_VHCA);
+ MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE);
+ MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE);
+ MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE);
+ default: return "unknown command opcode";
+ }
+}
+
+static const char *cmd_status_str(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_STAT_OK:
+ return "OK";
+ case MLX5_CMD_STAT_INT_ERR:
+ return "internal error";
+ case MLX5_CMD_STAT_BAD_OP_ERR:
+ return "bad operation";
+ case MLX5_CMD_STAT_BAD_PARAM_ERR:
+ return "bad parameter";
+ case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
+ return "bad system state";
+ case MLX5_CMD_STAT_BAD_RES_ERR:
+ return "bad resource";
+ case MLX5_CMD_STAT_RES_BUSY:
+ return "resource busy";
+ case MLX5_CMD_STAT_LIM_ERR:
+ return "limits exceeded";
+ case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
+ return "bad resource state";
+ case MLX5_CMD_STAT_IX_ERR:
+ return "bad index";
+ case MLX5_CMD_STAT_NO_RES_ERR:
+ return "no resources";
+ case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
+ return "bad input length";
+ case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
+ return "bad output length";
+ case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
+ return "bad QP state";
+ case MLX5_CMD_STAT_BAD_PKT_ERR:
+ return "bad packet (discarded)";
+ case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
+ return "bad size too many outstanding CQEs";
+ default:
+ return "unknown status";
+ }
+}
+
+static int cmd_status_to_err(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_STAT_OK: return 0;
+ case MLX5_CMD_STAT_INT_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_RES_BUSY: return -EBUSY;
+ case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM;
+ case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_IX_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN;
+ case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL;
+ default: return -EIO;
+ }
+}
+
+void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out)
+{
+ u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u8 status = MLX5_GET(mbox_out, out, status);
+
+ mlx5_core_err_rl(dev,
+ "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n",
+ mlx5_command_str(opcode), opcode, op_mod,
+ cmd_status_str(status), status, syndrome, cmd_status_to_err(status));
+}
+EXPORT_SYMBOL(mlx5_cmd_out_err);
+
+static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out)
+{
+ u16 opcode, op_mod;
+ u16 uid;
+
+ opcode = in_to_opcode(in);
+ op_mod = MLX5_GET(mbox_in, in, op_mod);
+ uid = MLX5_GET(mbox_in, in, uid);
+
+ if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
+ mlx5_cmd_out_err(dev, opcode, op_mod, out);
+}
+
+int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out)
+{
+ /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */
+ if (err == -ENXIO) {
+ u16 opcode = in_to_opcode(in);
+ u32 syndrome;
+ u8 status;
+
+ /* PCI Error, emulate command return status, for smooth reset */
+ err = mlx5_internal_err_ret_value(dev, opcode, &syndrome, &status);
+ MLX5_SET(mbox_out, out, status, status);
+ MLX5_SET(mbox_out, out, syndrome, syndrome);
+ if (!err)
+ return 0;
+ }
+
+ /* driver or FW delivery error */
+ if (err != -EREMOTEIO && err)
+ return err;
+
+ /* check outbox status */
+ err = cmd_status_to_err(MLX5_GET(mbox_out, out, status));
+ if (err)
+ cmd_status_print(dev, in, out);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_check);
+
+static void dump_command(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_work_ent *ent, int input)
+{
+ struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
+ struct mlx5_cmd_mailbox *next = msg->next;
+ int n = mlx5_calc_cmd_blocks(msg);
+ u16 op = ent->op;
+ int data_only;
+ u32 offset = 0;
+ int dump_len;
+ int i;
+
+ mlx5_core_dbg(dev, "cmd[%d]: start dump\n", ent->idx);
+ data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
+
+ if (data_only)
+ mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA,
+ "cmd[%d]: dump command data %s(0x%x) %s\n",
+ ent->idx, mlx5_command_str(op), op,
+ input ? "INPUT" : "OUTPUT");
+ else
+ mlx5_core_dbg(dev, "cmd[%d]: dump command %s(0x%x) %s\n",
+ ent->idx, mlx5_command_str(op), op,
+ input ? "INPUT" : "OUTPUT");
+
+ if (data_only) {
+ if (input) {
+ dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset, ent->idx);
+ offset += sizeof(ent->lay->in);
+ } else {
+ dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset, ent->idx);
+ offset += sizeof(ent->lay->out);
+ }
+ } else {
+ dump_buf(ent->lay, sizeof(*ent->lay), 0, offset, ent->idx);
+ offset += sizeof(*ent->lay);
+ }
+
+ for (i = 0; i < n && next; i++) {
+ if (data_only) {
+ dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
+ dump_buf(next->buf, dump_len, 1, offset, ent->idx);
+ offset += MLX5_CMD_DATA_BLOCK_SIZE;
+ } else {
+ mlx5_core_dbg(dev, "cmd[%d]: command block:\n", ent->idx);
+ dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset,
+ ent->idx);
+ offset += sizeof(struct mlx5_cmd_prot_block);
+ }
+ next = next->next;
+ }
+
+ if (data_only)
+ pr_debug("\n");
+
+ mlx5_core_dbg(dev, "cmd[%d]: end dump\n", ent->idx);
+}
+
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+
+static void cb_timeout_handler(struct work_struct *work)
+{
+ struct delayed_work *dwork = container_of(work, struct delayed_work,
+ work);
+ struct mlx5_cmd_work_ent *ent = container_of(dwork,
+ struct mlx5_cmd_work_ent,
+ cb_timeout_work);
+ struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+ cmd);
+
+ mlx5_cmd_eq_recover(dev);
+
+ /* Maybe got handled by eq recover ? */
+ if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) {
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx,
+ mlx5_command_str(ent->op), ent->op);
+ goto out; /* phew, already handled */
+ }
+
+ ent->ret = -ETIMEDOUT;
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n",
+ ent->idx, mlx5_command_str(ent->op), ent->op);
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
+
+out:
+ cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
+}
+
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
+static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_msg *msg);
+
+static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
+{
+ if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL)
+ return true;
+
+ return cmd->allowed_opcode == opcode;
+}
+
+bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
+{
+ return pci_channel_offline(dev->pdev) ||
+ dev->cmd.state != MLX5_CMDIF_STATE_UP ||
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR;
+}
+
+static void cmd_work_handler(struct work_struct *work)
+{
+ struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+ struct mlx5_cmd *cmd = ent->cmd;
+ bool poll_cmd = ent->polling;
+ struct mlx5_cmd_layout *lay;
+ struct mlx5_core_dev *dev;
+ unsigned long cb_timeout;
+ struct semaphore *sem;
+ unsigned long flags;
+ int alloc_ret;
+ int cmd_mode;
+
+ dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
+
+ complete(&ent->handling);
+ sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem;
+ down(sem);
+ if (!ent->page_queue) {
+ alloc_ret = cmd_alloc_index(cmd, ent);
+ if (alloc_ret < 0) {
+ mlx5_core_err_rl(dev, "failed to allocate command entry\n");
+ if (ent->callback) {
+ ent->callback(-EAGAIN, ent->context);
+ mlx5_free_cmd_msg(dev, ent->out);
+ free_msg(dev, ent->in);
+ cmd_ent_put(ent);
+ } else {
+ ent->ret = -EAGAIN;
+ complete(&ent->done);
+ }
+ up(sem);
+ return;
+ }
+ } else {
+ ent->idx = cmd->vars.max_reg_cmds;
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ clear_bit(ent->idx, &cmd->vars.bitmask);
+ cmd->ent_arr[ent->idx] = ent;
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+ }
+
+ lay = get_inst(cmd, ent->idx);
+ ent->lay = lay;
+ memset(lay, 0, sizeof(*lay));
+ memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
+ if (ent->in->next)
+ lay->in_ptr = cpu_to_be64(ent->in->next->dma);
+ lay->inlen = cpu_to_be32(ent->in->len);
+ if (ent->out->next)
+ lay->out_ptr = cpu_to_be64(ent->out->next->dma);
+ lay->outlen = cpu_to_be32(ent->out->len);
+ lay->type = MLX5_PCI_CMD_XPORT;
+ lay->token = ent->token;
+ lay->status_own = CMD_OWNER_HW;
+ set_signature(ent, !cmd->checksum_disabled);
+ dump_command(dev, ent, 1);
+ ent->ts1 = ktime_get_ns();
+ cmd_mode = cmd->mode;
+
+ if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
+ cmd_ent_get(ent);
+ set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
+
+ cmd_ent_get(ent); /* for the _real_ FW event on completion */
+ /* Skip sending command to fw if internal error */
+ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
+ ent->ret = -ENXIO;
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
+ return;
+ }
+
+ /* ring doorbell after the descriptor is valid */
+ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
+ wmb();
+ iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
+ /* if not in polling don't use ent after this point */
+ if (cmd_mode == CMD_MODE_POLLING || poll_cmd) {
+ poll_timeout(ent);
+ /* make sure we read the descriptor after ownership is SW */
+ rmb();
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT));
+ }
+}
+
+static int deliv_status_to_err(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_DELIVERY_STAT_OK:
+ case MLX5_DRIVER_STATUS_ABORTED:
+ return 0;
+ case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
+ case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
+ return -EBADR;
+ case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
+ case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
+ case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
+ return -EFAULT; /* Bad address */
+ case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
+ case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
+ case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
+ case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
+ return -ENOMSG;
+ case MLX5_CMD_DELIVERY_STAT_FW_ERR:
+ return -EIO;
+ default:
+ return -EINVAL;
+ }
+}
+
+static const char *deliv_status_to_str(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_DELIVERY_STAT_OK:
+ return "no errors";
+ case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
+ return "signature error";
+ case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
+ return "token error";
+ case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
+ return "bad block number";
+ case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
+ return "output pointer not aligned to block size";
+ case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
+ return "input pointer not aligned to block size";
+ case MLX5_CMD_DELIVERY_STAT_FW_ERR:
+ return "firmware internal error";
+ case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
+ return "command input length error";
+ case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
+ return "command output length error";
+ case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
+ return "reserved fields not cleared";
+ case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
+ return "bad command descriptor type";
+ default:
+ return "unknown status code";
+ }
+}
+
+enum {
+ MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000,
+};
+
+static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_work_ent *ent)
+{
+ unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC);
+
+ mlx5_cmd_eq_recover(dev);
+
+ /* Re-wait on the ent->done after executing the recovery flow. If the
+ * recovery flow (or any other recovery flow running simultaneously)
+ * has recovered an EQE, it should cause the entry to be completed by
+ * the command interface.
+ */
+ if (wait_for_completion_timeout(&ent->done, timeout)) {
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx,
+ mlx5_command_str(ent->op), ent->op);
+ return;
+ }
+
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx,
+ mlx5_command_str(ent->op), ent->op);
+
+ ent->ret = -ETIMEDOUT;
+ mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
+}
+
+static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
+{
+ unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int err;
+
+ if (!wait_for_completion_timeout(&ent->handling, timeout) &&
+ cancel_work_sync(&ent->work)) {
+ ent->ret = -ECANCELED;
+ goto out_err;
+ }
+ if (cmd->mode == CMD_MODE_POLLING || ent->polling)
+ wait_for_completion(&ent->done);
+ else if (!wait_for_completion_timeout(&ent->done, timeout))
+ wait_func_handle_exec_timeout(dev, ent);
+
+out_err:
+ err = ent->ret;
+
+ if (err == -ETIMEDOUT) {
+ mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+ mlx5_command_str(ent->op), ent->op);
+ } else if (err == -ECANCELED) {
+ mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
+ mlx5_command_str(ent->op), ent->op);
+ }
+ mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
+ err, deliv_status_to_str(ent->status), ent->status);
+
+ return err;
+}
+
+/* Notes:
+ * 1. Callback functions may not sleep
+ * 2. page queue commands do not support asynchrous completion
+ *
+ * return value in case (!callback):
+ * ret < 0 : Command execution couldn't be submitted by driver
+ * ret > 0 : Command execution couldn't be performed by firmware
+ * ret == 0: Command was executed by FW, Caller must check FW outbox status.
+ *
+ * return value in case (callback):
+ * ret < 0 : Command execution couldn't be submitted by driver
+ * ret == 0: Command will be submitted to FW for execution
+ * and the callback will be called for further status updates
+ */
+static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out, void *uout, int uout_size,
+ mlx5_cmd_cbk_t callback,
+ void *context, int page_queue,
+ u8 token, bool force_polling)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct mlx5_cmd_work_ent *ent;
+ struct mlx5_cmd_stats *stats;
+ u8 status = 0;
+ int err = 0;
+ s64 ds;
+
+ if (callback && page_queue)
+ return -EINVAL;
+
+ ent = cmd_alloc_ent(cmd, in, out, uout, uout_size,
+ callback, context, page_queue);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ /* put for this ent is when consumed, depending on the use case
+ * 1) (!callback) blocking flow: by caller after wait_func completes
+ * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled
+ */
+
+ ent->token = token;
+ ent->polling = force_polling;
+
+ init_completion(&ent->handling);
+ if (!callback)
+ init_completion(&ent->done);
+
+ INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler);
+ INIT_WORK(&ent->work, cmd_work_handler);
+ if (page_queue) {
+ cmd_work_handler(&ent->work);
+ } else if (!queue_work(cmd->wq, &ent->work)) {
+ mlx5_core_warn(dev, "failed to queue work\n");
+ err = -EALREADY;
+ goto out_free;
+ }
+
+ if (callback)
+ return 0; /* mlx5_cmd_comp_handler() will put(ent) */
+
+ err = wait_func(dev, ent);
+ if (err == -ETIMEDOUT || err == -ECANCELED)
+ goto out_free;
+
+ ds = ent->ts2 - ent->ts1;
+ if (ent->op < MLX5_CMD_OP_MAX) {
+ stats = &cmd->stats[ent->op];
+ spin_lock_irq(&stats->lock);
+ stats->sum += ds;
+ ++stats->n;
+ spin_unlock_irq(&stats->lock);
+ }
+ mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+ "fw exec time for %s is %lld nsec\n",
+ mlx5_command_str(ent->op), ds);
+
+out_free:
+ status = ent->status;
+ cmd_ent_put(ent);
+ return err ? : status;
+}
+
+static ssize_t dbg_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ char lbuf[3];
+ int err;
+
+ if (!dbg->in_msg || !dbg->out_msg)
+ return -ENOMEM;
+
+ if (count < sizeof(lbuf) - 1)
+ return -EINVAL;
+
+ if (copy_from_user(lbuf, buf, sizeof(lbuf) - 1))
+ return -EFAULT;
+
+ lbuf[sizeof(lbuf) - 1] = 0;
+
+ if (strcmp(lbuf, "go"))
+ return -EINVAL;
+
+ err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen);
+
+ return err ? err : count;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = dbg_write,
+};
+
+static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
+ u8 token)
+{
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_mailbox *next;
+ int copy;
+
+ if (!to || !from)
+ return -ENOMEM;
+
+ copy = min_t(int, size, sizeof(to->first.data));
+ memcpy(to->first.data, from, copy);
+ size -= copy;
+ from += copy;
+
+ next = to->next;
+ while (size) {
+ if (!next) {
+ /* this is a BUG */
+ return -ENOMEM;
+ }
+
+ copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+ block = next->buf;
+ memcpy(block->data, from, copy);
+ from += copy;
+ size -= copy;
+ block->token = token;
+ next = next->next;
+ }
+
+ return 0;
+}
+
+static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size)
+{
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_mailbox *next;
+ int copy;
+
+ if (!to || !from)
+ return -ENOMEM;
+
+ copy = min_t(int, size, sizeof(from->first.data));
+ memcpy(to, from->first.data, copy);
+ size -= copy;
+ to += copy;
+
+ next = from->next;
+ while (size) {
+ if (!next) {
+ /* this is a BUG */
+ return -ENOMEM;
+ }
+
+ copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+ block = next->buf;
+
+ memcpy(to, block->data, copy);
+ to += copy;
+ size -= copy;
+ next = next->next;
+ }
+
+ return 0;
+}
+
+static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev,
+ gfp_t flags)
+{
+ struct mlx5_cmd_mailbox *mailbox;
+
+ mailbox = kmalloc(sizeof(*mailbox), flags);
+ if (!mailbox)
+ return ERR_PTR(-ENOMEM);
+
+ mailbox->buf = dma_pool_zalloc(dev->cmd.pool, flags,
+ &mailbox->dma);
+ if (!mailbox->buf) {
+ mlx5_core_dbg(dev, "failed allocation\n");
+ kfree(mailbox);
+ return ERR_PTR(-ENOMEM);
+ }
+ mailbox->next = NULL;
+
+ return mailbox;
+}
+
+static void free_cmd_box(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_mailbox *mailbox)
+{
+ dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+ kfree(mailbox);
+}
+
+static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+ gfp_t flags, int size,
+ u8 token)
+{
+ struct mlx5_cmd_mailbox *tmp, *head = NULL;
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_msg *msg;
+ int err;
+ int n;
+ int i;
+
+ msg = kzalloc(sizeof(*msg), flags);
+ if (!msg)
+ return ERR_PTR(-ENOMEM);
+
+ msg->len = size;
+ n = mlx5_calc_cmd_blocks(msg);
+
+ for (i = 0; i < n; i++) {
+ tmp = alloc_cmd_box(dev, flags);
+ if (IS_ERR(tmp)) {
+ mlx5_core_warn(dev, "failed allocating block\n");
+ err = PTR_ERR(tmp);
+ goto err_alloc;
+ }
+
+ block = tmp->buf;
+ tmp->next = head;
+ block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
+ block->block_num = cpu_to_be32(n - i - 1);
+ block->token = token;
+ head = tmp;
+ }
+ msg->next = head;
+ return msg;
+
+err_alloc:
+ while (head) {
+ tmp = head->next;
+ free_cmd_box(dev, head);
+ head = tmp;
+ }
+ kfree(msg);
+
+ return ERR_PTR(err);
+}
+
+static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_msg *msg)
+{
+ struct mlx5_cmd_mailbox *head = msg->next;
+ struct mlx5_cmd_mailbox *next;
+
+ while (head) {
+ next = head->next;
+ free_cmd_box(dev, head);
+ head = next;
+ }
+ kfree(msg);
+}
+
+static ssize_t data_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ void *ptr;
+
+ if (*pos != 0)
+ return -EINVAL;
+
+ kfree(dbg->in_msg);
+ dbg->in_msg = NULL;
+ dbg->inlen = 0;
+ ptr = memdup_user(buf, count);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+ dbg->in_msg = ptr;
+ dbg->inlen = count;
+
+ *pos = count;
+
+ return count;
+}
+
+static ssize_t data_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+
+ if (!dbg->out_msg)
+ return -ENOMEM;
+
+ return simple_read_from_buffer(buf, count, pos, dbg->out_msg,
+ dbg->outlen);
+}
+
+static const struct file_operations dfops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = data_write,
+ .read = data_read,
+};
+
+static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ char outlen[8];
+ int err;
+
+ err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen);
+ if (err < 0)
+ return err;
+
+ return simple_read_from_buffer(buf, count, pos, outlen, err);
+}
+
+static ssize_t outlen_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ char outlen_str[8] = {0};
+ int outlen;
+ void *ptr;
+ int err;
+
+ if (*pos != 0 || count > 6)
+ return -EINVAL;
+
+ kfree(dbg->out_msg);
+ dbg->out_msg = NULL;
+ dbg->outlen = 0;
+
+ if (copy_from_user(outlen_str, buf, count))
+ return -EFAULT;
+
+ err = sscanf(outlen_str, "%d", &outlen);
+ if (err != 1)
+ return -EINVAL;
+
+ ptr = kzalloc(outlen, GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ dbg->out_msg = ptr;
+ dbg->outlen = outlen;
+
+ *pos = count;
+
+ return count;
+}
+
+static const struct file_operations olfops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = outlen_write,
+ .read = outlen_read,
+};
+
+static void set_wqname(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+
+ snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s",
+ dev_name(dev->device));
+}
+
+static void clean_debug_files(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+
+ if (!mlx5_debugfs_root)
+ return;
+
+ mlx5_cmdif_debugfs_cleanup(dev);
+ debugfs_remove_recursive(dbg->dbg_root);
+}
+
+static void create_debugfs_files(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+
+ dbg->dbg_root = debugfs_create_dir("cmd", mlx5_debugfs_get_dev_root(dev));
+
+ debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops);
+ debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops);
+ debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops);
+ debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status);
+ debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
+
+ mlx5_cmdif_debugfs_init(dev);
+}
+
+void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ for (i = 0; i < cmd->vars.max_reg_cmds; i++)
+ down(&cmd->vars.sem);
+ down(&cmd->vars.pages_sem);
+
+ cmd->allowed_opcode = opcode;
+
+ up(&cmd->vars.pages_sem);
+ for (i = 0; i < cmd->vars.max_reg_cmds; i++)
+ up(&cmd->vars.sem);
+}
+
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ for (i = 0; i < cmd->vars.max_reg_cmds; i++)
+ down(&cmd->vars.sem);
+ down(&cmd->vars.pages_sem);
+
+ cmd->mode = mode;
+
+ up(&cmd->vars.pages_sem);
+ for (i = 0; i < cmd->vars.max_reg_cmds; i++)
+ up(&cmd->vars.sem);
+}
+
+static int cmd_comp_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_core_dev *dev;
+ struct mlx5_cmd *cmd;
+ struct mlx5_eqe *eqe;
+
+ cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb);
+ dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ eqe = data;
+
+ mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+
+ return NOTIFY_OK;
+}
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+{
+ MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD);
+ mlx5_eq_notifier_register(dev, &dev->cmd.nb);
+ mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}
+
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+ mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+ mlx5_eq_notifier_unregister(dev, &dev->cmd.nb);
+}
+
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+{
+ unsigned long flags;
+
+ if (msg->parent) {
+ spin_lock_irqsave(&msg->parent->lock, flags);
+ list_add_tail(&msg->list, &msg->parent->head);
+ spin_unlock_irqrestore(&msg->parent->lock, flags);
+ } else {
+ mlx5_free_cmd_msg(dev, msg);
+ }
+}
+
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct mlx5_cmd_work_ent *ent;
+ mlx5_cmd_cbk_t callback;
+ void *context;
+ int err;
+ int i;
+ s64 ds;
+ struct mlx5_cmd_stats *stats;
+ unsigned long flags;
+ unsigned long vector;
+
+ /* there can be at most 32 command queues */
+ vector = vec & 0xffffffff;
+ for (i = 0; i < (1 << cmd->vars.log_sz); i++) {
+ if (test_bit(i, &vector)) {
+ ent = cmd->ent_arr[i];
+
+ /* if we already completed the command, ignore it */
+ if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
+ &ent->state)) {
+ /* only real completion can free the cmd slot */
+ if (!forced) {
+ mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+ ent->idx);
+ cmd_ent_put(ent);
+ }
+ continue;
+ }
+
+ if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work))
+ cmd_ent_put(ent); /* timeout work was canceled */
+
+ if (!forced || /* Real FW completion */
+ mlx5_cmd_is_down(dev) || /* No real FW completion is expected */
+ !opcode_allowed(cmd, ent->op))
+ cmd_ent_put(ent);
+
+ ent->ts2 = ktime_get_ns();
+ memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
+ dump_command(dev, ent, 0);
+
+ if (vec & MLX5_TRIGGERED_CMD_COMP)
+ ent->ret = -ENXIO;
+
+ if (!ent->ret) { /* Command completed by FW */
+ if (!cmd->checksum_disabled)
+ ent->ret = verify_signature(ent);
+
+ ent->status = ent->lay->status_own >> 1;
+
+ mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
+ ent->ret, deliv_status_to_str(ent->status), ent->status);
+ }
+
+ if (ent->callback) {
+ ds = ent->ts2 - ent->ts1;
+ if (ent->op < MLX5_CMD_OP_MAX) {
+ stats = &cmd->stats[ent->op];
+ spin_lock_irqsave(&stats->lock, flags);
+ stats->sum += ds;
+ ++stats->n;
+ spin_unlock_irqrestore(&stats->lock, flags);
+ }
+
+ callback = ent->callback;
+ context = ent->context;
+ err = ent->ret ? : ent->status;
+ if (err > 0) /* Failed in FW, command didn't execute */
+ err = deliv_status_to_err(err);
+
+ if (!err)
+ err = mlx5_copy_from_msg(ent->uout,
+ ent->out,
+ ent->uout_size);
+
+ mlx5_free_cmd_msg(dev, ent->out);
+ free_msg(dev, ent->in);
+
+ /* final consumer is done, release ent */
+ cmd_ent_put(ent);
+ callback(err, context);
+ } else {
+ /* release wait_func() so mlx5_cmd_invoke()
+ * can make the final ent_put()
+ */
+ complete(&ent->done);
+ }
+ }
+ }
+}
+
+static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ unsigned long bitmask;
+ unsigned long flags;
+ u64 vector;
+ int i;
+
+ /* wait for pending handlers to complete */
+ mlx5_eq_synchronize_cmd_irq(dev);
+ spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+ vector = ~dev->cmd.vars.bitmask & ((1ul << (1 << dev->cmd.vars.log_sz)) - 1);
+ if (!vector)
+ goto no_trig;
+
+ bitmask = vector;
+ /* we must increment the allocated entries refcount before triggering the completions
+ * to guarantee pending commands will not get freed in the meanwhile.
+ * For that reason, it also has to be done inside the alloc_lock.
+ */
+ for_each_set_bit(i, &bitmask, (1 << cmd->vars.log_sz))
+ cmd_ent_get(cmd->ent_arr[i]);
+ vector |= MLX5_TRIGGERED_CMD_COMP;
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+ mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+ mlx5_cmd_comp_handler(dev, vector, true);
+ for_each_set_bit(i, &bitmask, (1 << cmd->vars.log_sz))
+ cmd_ent_put(cmd->ent_arr[i]);
+ return;
+
+no_trig:
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
+
+void mlx5_cmd_flush(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ for (i = 0; i < cmd->vars.max_reg_cmds; i++) {
+ while (down_trylock(&cmd->vars.sem)) {
+ mlx5_cmd_trigger_completions(dev);
+ cond_resched();
+ }
+ }
+
+ while (down_trylock(&cmd->vars.pages_sem)) {
+ mlx5_cmd_trigger_completions(dev);
+ cond_resched();
+ }
+
+ /* Unlock cmdif */
+ up(&cmd->vars.pages_sem);
+ for (i = 0; i < cmd->vars.max_reg_cmds; i++)
+ up(&cmd->vars.sem);
+}
+
+static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
+ gfp_t gfp)
+{
+ struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+ struct cmd_msg_cache *ch = NULL;
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ if (in_size <= 16)
+ goto cache_miss;
+
+ for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ ch = &cmd->cache[i];
+ if (in_size > ch->max_inbox_size)
+ continue;
+ spin_lock_irq(&ch->lock);
+ if (list_empty(&ch->head)) {
+ spin_unlock_irq(&ch->lock);
+ continue;
+ }
+ msg = list_entry(ch->head.next, typeof(*msg), list);
+ /* For cached lists, we must explicitly state what is
+ * the real size
+ */
+ msg->len = in_size;
+ list_del(&msg->list);
+ spin_unlock_irq(&ch->lock);
+ break;
+ }
+
+ if (!IS_ERR(msg))
+ return msg;
+
+cache_miss:
+ msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
+ return msg;
+}
+
+static int is_manage_pages(void *in)
+{
+ return in_to_opcode(in) == MLX5_CMD_OP_MANAGE_PAGES;
+}
+
+/* Notes:
+ * 1. Callback functions may not sleep
+ * 2. Page queue commands do not support asynchrous completion
+ */
+static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int out_size, mlx5_cmd_cbk_t callback, void *context,
+ bool force_polling)
+{
+ struct mlx5_cmd_msg *inb, *outb;
+ u16 opcode = in_to_opcode(in);
+ bool throttle_op;
+ int pages_queue;
+ gfp_t gfp;
+ u8 token;
+ int err;
+
+ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode))
+ return -ENXIO;
+
+ throttle_op = mlx5_cmd_is_throttle_opcode(opcode);
+ if (throttle_op) {
+ /* atomic context may not sleep */
+ if (callback)
+ return -EINVAL;
+ down(&dev->cmd.vars.throttle_sem);
+ }
+
+ pages_queue = is_manage_pages(in);
+ gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
+
+ inb = alloc_msg(dev, in_size, gfp);
+ if (IS_ERR(inb)) {
+ err = PTR_ERR(inb);
+ goto out_up;
+ }
+
+ token = alloc_token(&dev->cmd);
+
+ err = mlx5_copy_to_msg(inb, in, in_size, token);
+ if (err) {
+ mlx5_core_warn(dev, "err %d\n", err);
+ goto out_in;
+ }
+
+ outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token);
+ if (IS_ERR(outb)) {
+ err = PTR_ERR(outb);
+ goto out_in;
+ }
+
+ err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
+ pages_queue, token, force_polling);
+ if (callback)
+ return err;
+
+ if (err > 0) /* Failed in FW, command didn't execute */
+ err = deliv_status_to_err(err);
+
+ if (err)
+ goto out_out;
+
+ /* command completed by FW */
+ err = mlx5_copy_from_msg(out, outb, out_size);
+out_out:
+ mlx5_free_cmd_msg(dev, outb);
+out_in:
+ free_msg(dev, inb);
+out_up:
+ if (throttle_op)
+ up(&dev->cmd.vars.throttle_sem);
+ return err;
+}
+
+static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out)
+{
+ u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u8 status = MLX5_GET(mbox_out, out, status);
+
+ trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod,
+ cmd_status_str(status), status, syndrome,
+ cmd_status_to_err(status));
+}
+
+static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
+ u32 syndrome, int err)
+{
+ const char *namep = mlx5_command_str(opcode);
+ struct mlx5_cmd_stats *stats;
+
+ if (!err || !(strcmp(namep, "unknown command opcode")))
+ return;
+
+ stats = &dev->cmd.stats[opcode];
+ spin_lock_irq(&stats->lock);
+ stats->failed++;
+ if (err < 0)
+ stats->last_failed_errno = -err;
+ if (err == -EREMOTEIO) {
+ stats->failed_mbox_status++;
+ stats->last_failed_mbox_status = status;
+ stats->last_failed_syndrome = syndrome;
+ }
+ spin_unlock_irq(&stats->lock);
+}
+
+/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
+static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out)
+{
+ u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u8 status = MLX5_GET(mbox_out, out, status);
+
+ if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
+ err = -EIO;
+
+ if (!err && status != MLX5_CMD_STAT_OK) {
+ err = -EREMOTEIO;
+ mlx5_cmd_err_trace(dev, opcode, op_mod, out);
+ }
+
+ cmd_status_log(dev, opcode, status, syndrome, err);
+ return err;
+}
+
+/**
+ * mlx5_cmd_do - Executes a fw command, wait for completion.
+ * Unlike mlx5_cmd_exec, this function will not translate or intercept
+ * outbox.status and will return -EREMOTEIO when
+ * outbox.status != MLX5_CMD_STAT_OK
+ *
+ * @dev: mlx5 core device
+ * @in: inbox mlx5_ifc command buffer
+ * @in_size: inbox buffer size
+ * @out: outbox mlx5_ifc buffer
+ * @out_size: outbox size
+ *
+ * @return:
+ * -EREMOTEIO : Command executed by FW, outbox.status != MLX5_CMD_STAT_OK.
+ * Caller must check FW outbox status.
+ * 0 : Command execution successful, outbox.status == MLX5_CMD_STAT_OK.
+ * < 0 : Command execution couldn't be performed by firmware or driver
+ */
+int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size)
+{
+ int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
+ u16 op_mod = MLX5_GET(mbox_in, in, op_mod);
+ u16 opcode = in_to_opcode(in);
+
+ return cmd_status_err(dev, err, opcode, op_mod, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_do);
+
+/**
+ * mlx5_cmd_exec - Executes a fw command, wait for completion
+ *
+ * @dev: mlx5 core device
+ * @in: inbox mlx5_ifc command buffer
+ * @in_size: inbox buffer size
+ * @out: outbox mlx5_ifc buffer
+ * @out_size: outbox size
+ *
+ * @return: 0 if no error, FW command execution was successful
+ * and outbox status is ok.
+ */
+int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int out_size)
+{
+ int err = mlx5_cmd_do(dev, in, in_size, out, out_size);
+
+ return mlx5_cmd_check(dev, err, in, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec);
+
+/**
+ * mlx5_cmd_exec_polling - Executes a fw command, poll for completion
+ * Needed for driver force teardown, when command completion EQ
+ * will not be available to complete the command
+ *
+ * @dev: mlx5 core device
+ * @in: inbox mlx5_ifc command buffer
+ * @in_size: inbox buffer size
+ * @out: outbox mlx5_ifc buffer
+ * @out_size: outbox size
+ *
+ * @return: 0 if no error, FW command execution was successful
+ * and outbox status is ok.
+ */
+int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
+ void *out, int out_size)
+{
+ int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
+ u16 op_mod = MLX5_GET(mbox_in, in, op_mod);
+ u16 opcode = in_to_opcode(in);
+
+ err = cmd_status_err(dev, err, opcode, op_mod, out);
+ return mlx5_cmd_check(dev, err, in, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_polling);
+
+void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
+ struct mlx5_async_ctx *ctx)
+{
+ ctx->dev = dev;
+ /* Starts at 1 to avoid doing wake_up if we are not cleaning up */
+ atomic_set(&ctx->num_inflight, 1);
+ init_completion(&ctx->inflight_done);
+}
+EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
+
+/**
+ * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx
+ * @ctx: The ctx to clean
+ *
+ * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The
+ * caller must ensure that mlx5_cmd_exec_cb() is not called during or after
+ * the call mlx5_cleanup_async_ctx().
+ */
+void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->num_inflight))
+ wait_for_completion(&ctx->inflight_done);
+}
+EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx);
+
+static void mlx5_cmd_exec_cb_handler(int status, void *_work)
+{
+ struct mlx5_async_work *work = _work;
+ struct mlx5_async_ctx *ctx;
+
+ ctx = work->ctx;
+ status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out);
+ work->user_callback(status, work);
+ if (atomic_dec_and_test(&ctx->num_inflight))
+ complete(&ctx->inflight_done);
+}
+
+int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
+ void *out, int out_size, mlx5_async_cbk_t callback,
+ struct mlx5_async_work *work)
+{
+ int ret;
+
+ work->ctx = ctx;
+ work->user_callback = callback;
+ work->opcode = in_to_opcode(in);
+ work->op_mod = MLX5_GET(mbox_in, in, op_mod);
+ work->out = out;
+ if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
+ return -EIO;
+ ret = cmd_exec(ctx->dev, in, in_size, out, out_size,
+ mlx5_cmd_exec_cb_handler, work, false);
+ if (ret && atomic_dec_and_test(&ctx->num_inflight))
+ complete(&ctx->inflight_done);
+
+ return ret;
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_cb);
+
+static void destroy_msg_cache(struct mlx5_core_dev *dev)
+{
+ struct cmd_msg_cache *ch;
+ struct mlx5_cmd_msg *msg;
+ struct mlx5_cmd_msg *n;
+ int i;
+
+ for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ ch = &dev->cmd.cache[i];
+ list_for_each_entry_safe(msg, n, &ch->head, list) {
+ list_del(&msg->list);
+ mlx5_free_cmd_msg(dev, msg);
+ }
+ }
+}
+
+static unsigned cmd_cache_num_ent[MLX5_NUM_COMMAND_CACHES] = {
+ 512, 32, 16, 8, 2
+};
+
+static unsigned cmd_cache_ent_size[MLX5_NUM_COMMAND_CACHES] = {
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 2,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 16,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 256,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 512,
+};
+
+static void create_msg_cache(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct cmd_msg_cache *ch;
+ struct mlx5_cmd_msg *msg;
+ int i;
+ int k;
+
+ /* Initialize and fill the caches with initial entries */
+ for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) {
+ ch = &cmd->cache[k];
+ spin_lock_init(&ch->lock);
+ INIT_LIST_HEAD(&ch->head);
+ ch->num_ent = cmd_cache_num_ent[k];
+ ch->max_inbox_size = cmd_cache_ent_size[k];
+ for (i = 0; i < ch->num_ent; i++) {
+ msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL | __GFP_NOWARN,
+ ch->max_inbox_size, 0);
+ if (IS_ERR(msg))
+ break;
+ msg->parent = ch;
+ list_add_tail(&msg->list, &ch->head);
+ }
+ }
+}
+
+static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
+{
+ cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE,
+ &cmd->alloc_dma, GFP_KERNEL);
+ if (!cmd->cmd_alloc_buf)
+ return -ENOMEM;
+
+ /* make sure it is aligned to 4K */
+ if (!((uintptr_t)cmd->cmd_alloc_buf & (MLX5_ADAPTER_PAGE_SIZE - 1))) {
+ cmd->cmd_buf = cmd->cmd_alloc_buf;
+ cmd->dma = cmd->alloc_dma;
+ cmd->alloc_size = MLX5_ADAPTER_PAGE_SIZE;
+ return 0;
+ }
+
+ dma_free_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
+ cmd->alloc_dma);
+ cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev),
+ 2 * MLX5_ADAPTER_PAGE_SIZE - 1,
+ &cmd->alloc_dma, GFP_KERNEL);
+ if (!cmd->cmd_alloc_buf)
+ return -ENOMEM;
+
+ cmd->cmd_buf = PTR_ALIGN(cmd->cmd_alloc_buf, MLX5_ADAPTER_PAGE_SIZE);
+ cmd->dma = ALIGN(cmd->alloc_dma, MLX5_ADAPTER_PAGE_SIZE);
+ cmd->alloc_size = 2 * MLX5_ADAPTER_PAGE_SIZE - 1;
+ return 0;
+}
+
+static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
+{
+ dma_free_coherent(mlx5_core_dma_dev(dev), cmd->alloc_size, cmd->cmd_alloc_buf,
+ cmd->alloc_dma);
+}
+
+static u16 cmdif_rev(struct mlx5_core_dev *dev)
+{
+ return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
+}
+
+int mlx5_cmd_init(struct mlx5_core_dev *dev)
+{
+ int size = sizeof(struct mlx5_cmd_prot_block);
+ int align = roundup_pow_of_two(size);
+ struct mlx5_cmd *cmd = &dev->cmd;
+ u32 cmd_h, cmd_l;
+ u16 cmd_if_rev;
+ int err;
+ int i;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd_if_rev = cmdif_rev(dev);
+ if (cmd_if_rev != CMD_IF_REV) {
+ mlx5_core_err(dev,
+ "Driver cmdif rev(%d) differs from firmware's(%d)\n",
+ CMD_IF_REV, cmd_if_rev);
+ return -EINVAL;
+ }
+
+ cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
+ if (!cmd->pool)
+ return -ENOMEM;
+
+ err = alloc_cmd_page(dev, cmd);
+ if (err)
+ goto err_free_pool;
+
+ cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff;
+ cmd->vars.log_sz = cmd_l >> 4 & 0xf;
+ cmd->vars.log_stride = cmd_l & 0xf;
+ if (1 << cmd->vars.log_sz > MLX5_MAX_COMMANDS) {
+ mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n",
+ 1 << cmd->vars.log_sz);
+ err = -EINVAL;
+ goto err_free_page;
+ }
+
+ if (cmd->vars.log_sz + cmd->vars.log_stride > MLX5_ADAPTER_PAGE_SHIFT) {
+ mlx5_core_err(dev, "command queue size overflow\n");
+ err = -EINVAL;
+ goto err_free_page;
+ }
+
+ cmd->state = MLX5_CMDIF_STATE_DOWN;
+ cmd->checksum_disabled = 1;
+ cmd->vars.max_reg_cmds = (1 << cmd->vars.log_sz) - 1;
+ cmd->vars.bitmask = (1UL << cmd->vars.max_reg_cmds) - 1;
+
+ cmd->vars.cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
+ if (cmd->vars.cmdif_rev > CMD_IF_REV) {
+ mlx5_core_err(dev, "driver does not support command interface version. driver %d, firmware %d\n",
+ CMD_IF_REV, cmd->vars.cmdif_rev);
+ err = -EOPNOTSUPP;
+ goto err_free_page;
+ }
+
+ spin_lock_init(&cmd->alloc_lock);
+ spin_lock_init(&cmd->token_lock);
+ for (i = 0; i < MLX5_CMD_OP_MAX; i++)
+ spin_lock_init(&cmd->stats[i].lock);
+
+ sema_init(&cmd->vars.sem, cmd->vars.max_reg_cmds);
+ sema_init(&cmd->vars.pages_sem, 1);
+ sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2));
+
+ cmd_h = (u32)((u64)(cmd->dma) >> 32);
+ cmd_l = (u32)(cmd->dma);
+ if (cmd_l & 0xfff) {
+ mlx5_core_err(dev, "invalid command queue address\n");
+ err = -ENOMEM;
+ goto err_free_page;
+ }
+
+ iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h);
+ iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz);
+
+ /* Make sure firmware sees the complete address before we proceed */
+ wmb();
+
+ mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
+
+ cmd->mode = CMD_MODE_POLLING;
+ cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL;
+
+ create_msg_cache(dev);
+
+ set_wqname(dev);
+ cmd->wq = create_singlethread_workqueue(cmd->wq_name);
+ if (!cmd->wq) {
+ mlx5_core_err(dev, "failed to create command workqueue\n");
+ err = -ENOMEM;
+ goto err_cache;
+ }
+
+ create_debugfs_files(dev);
+
+ return 0;
+
+err_cache:
+ destroy_msg_cache(dev);
+
+err_free_page:
+ free_cmd_page(dev, cmd);
+
+err_free_pool:
+ dma_pool_destroy(cmd->pool);
+ return err;
+}
+
+void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+
+ clean_debug_files(dev);
+ destroy_workqueue(cmd->wq);
+ destroy_msg_cache(dev);
+ free_cmd_page(dev, cmd);
+ dma_pool_destroy(cmd->pool);
+}
+
+void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
+ enum mlx5_cmdif_state cmdif_state)
+{
+ dev->cmd.state = cmdif_state;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
new file mode 100644
index 000000000..4caa1b6f4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/hardirq.h>
+#include <linux/mlx5/driver.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mlx5/cq.h>
+#include "mlx5_core.h"
+#include "lib/eq.h"
+
+#define TASKLET_MAX_TIME 2
+#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
+
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t)
+{
+ unsigned long flags;
+ unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
+ struct mlx5_eq_tasklet *ctx = from_tasklet(ctx, t, task);
+ struct mlx5_core_cq *mcq;
+ struct mlx5_core_cq *temp;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ list_splice_tail_init(&ctx->list, &ctx->process_list);
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ list_for_each_entry_safe(mcq, temp, &ctx->process_list,
+ tasklet_ctx.list) {
+ list_del_init(&mcq->tasklet_ctx.list);
+ mcq->tasklet_ctx.comp(mcq, NULL);
+ mlx5_cq_put(mcq);
+ if (time_after(jiffies, end))
+ break;
+ }
+
+ if (!list_empty(&ctx->process_list))
+ tasklet_schedule(&ctx->task);
+}
+
+static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
+ struct mlx5_eqe *eqe)
+{
+ unsigned long flags;
+ struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
+
+ spin_lock_irqsave(&tasklet_ctx->lock, flags);
+ /* When migrating CQs between EQs will be implemented, please note
+ * that you need to sync this point. It is possible that
+ * while migrating a CQ, completions on the old EQs could
+ * still arrive.
+ */
+ if (list_empty_careful(&cq->tasklet_ctx.list)) {
+ mlx5_cq_hold(cq);
+ list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
+ }
+ spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
+}
+
+/* Callers must verify outbox status in case of err */
+int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen, u32 *out, int outlen)
+{
+ int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+ c_eqn_or_apu_element);
+ u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
+ struct mlx5_eq_comp *eq;
+ int err;
+
+ eq = mlx5_eqn2comp_eq(dev, eqn);
+ if (IS_ERR(eq))
+ return PTR_ERR(eq);
+
+ memset(out, 0, outlen);
+ MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
+ err = mlx5_cmd_do(dev, in, inlen, out, outlen);
+ if (err)
+ return err;
+
+ cq->cqn = MLX5_GET(create_cq_out, out, cqn);
+ cq->cons_index = 0;
+ cq->arm_sn = 0;
+ cq->eq = eq;
+ cq->uid = MLX5_GET(create_cq_in, in, uid);
+ refcount_set(&cq->refcount, 1);
+ init_completion(&cq->free);
+ if (!cq->comp)
+ cq->comp = mlx5_add_cq_to_tasklet;
+ /* assuming CQ will be deleted before the EQ */
+ cq->tasklet_ctx.priv = &eq->tasklet_ctx;
+ INIT_LIST_HEAD(&cq->tasklet_ctx.list);
+
+ /* Add to comp EQ CQ tree to recv comp events */
+ err = mlx5_eq_add_cq(&eq->core, cq);
+ if (err)
+ goto err_cmd;
+
+ /* Add to async EQ CQ tree to recv async events */
+ err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq);
+ if (err)
+ goto err_cq_add;
+
+ cq->pid = current->pid;
+ err = mlx5_debug_cq_add(dev, cq);
+ if (err)
+ mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n",
+ cq->cqn);
+
+ cq->uar = dev->priv.uar;
+ cq->irqn = eq->core.irqn;
+
+ return 0;
+
+err_cq_add:
+ mlx5_eq_del_cq(&eq->core, cq);
+err_cmd:
+ MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+ MLX5_SET(destroy_cq_in, din, uid, cq->uid);
+ mlx5_cmd_exec_in(dev, destroy_cq, din);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_create_cq);
+
+/* oubox is checked and err val is normalized */
+int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen, u32 *out, int outlen)
+{
+ int err = mlx5_create_cq(dev, cq, in, inlen, out, outlen);
+
+ return mlx5_cmd_check(dev, err, in, out);
+}
+EXPORT_SYMBOL(mlx5_core_create_cq);
+
+int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
+ int err;
+
+ mlx5_debug_cq_remove(dev, cq);
+
+ mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
+ mlx5_eq_del_cq(&cq->eq->core, cq);
+
+ MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+ MLX5_SET(destroy_cq_in, in, uid, cq->uid);
+ err = mlx5_cmd_exec_in(dev, destroy_cq, in);
+ if (err)
+ return err;
+
+ synchronize_irq(cq->irqn);
+ mlx5_cq_put(cq);
+ wait_for_completion(&cq->free);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_cq);
+
+int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {};
+
+ MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ);
+ MLX5_SET(query_cq_in, in, cqn, cq->cqn);
+ return mlx5_cmd_exec_inout(dev, query_cq, in, out);
+}
+EXPORT_SYMBOL(mlx5_core_query_cq);
+
+int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {};
+
+ MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+ MLX5_SET(modify_cq_in, in, uid, cq->uid);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_cq);
+
+int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
+ struct mlx5_core_cq *cq,
+ u16 cq_period,
+ u16 cq_max_count)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {};
+ void *cqc;
+
+ MLX5_SET(modify_cq_in, in, cqn, cq->cqn);
+ cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, cq_period, cq_period);
+ MLX5_SET(cqc, cqc, cq_max_count, cq_max_count);
+ MLX5_SET(modify_cq_in, in,
+ modify_field_select_resize_field_select.modify_field_select.modify_field_select,
+ MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
+
+ return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
+}
+EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
new file mode 100644
index 000000000..e0b0729e2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -0,0 +1,574 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/eq.h"
+
+enum {
+ QP_PID,
+ QP_STATE,
+ QP_XPORT,
+ QP_MTU,
+ QP_N_RECV,
+ QP_RECV_SZ,
+ QP_N_SEND,
+ QP_LOG_PG_SZ,
+ QP_RQPN,
+};
+
+static char *qp_fields[] = {
+ [QP_PID] = "pid",
+ [QP_STATE] = "state",
+ [QP_XPORT] = "transport",
+ [QP_MTU] = "mtu",
+ [QP_N_RECV] = "num_recv",
+ [QP_RECV_SZ] = "rcv_wqe_sz",
+ [QP_N_SEND] = "num_send",
+ [QP_LOG_PG_SZ] = "log2_page_sz",
+ [QP_RQPN] = "remote_qpn",
+};
+
+enum {
+ EQ_NUM_EQES,
+ EQ_INTR,
+ EQ_LOG_PG_SZ,
+};
+
+static char *eq_fields[] = {
+ [EQ_NUM_EQES] = "num_eqes",
+ [EQ_INTR] = "intr",
+ [EQ_LOG_PG_SZ] = "log_page_size",
+};
+
+enum {
+ CQ_PID,
+ CQ_NUM_CQES,
+ CQ_LOG_PG_SZ,
+};
+
+static char *cq_fields[] = {
+ [CQ_PID] = "pid",
+ [CQ_NUM_CQES] = "num_cqes",
+ [CQ_LOG_PG_SZ] = "log_page_size",
+};
+
+struct dentry *mlx5_debugfs_root;
+EXPORT_SYMBOL(mlx5_debugfs_root);
+
+void mlx5_register_debugfs(void)
+{
+ mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL);
+}
+
+void mlx5_unregister_debugfs(void)
+{
+ debugfs_remove(mlx5_debugfs_root);
+}
+
+struct dentry *mlx5_debugfs_get_dev_root(struct mlx5_core_dev *dev)
+{
+ return dev->priv.dbg.dbg_root;
+}
+EXPORT_SYMBOL(mlx5_debugfs_get_dev_root);
+
+void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
+{
+ dev->priv.dbg.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg.dbg_root);
+}
+EXPORT_SYMBOL(mlx5_qp_debugfs_init);
+
+void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ debugfs_remove_recursive(dev->priv.dbg.qp_debugfs);
+}
+EXPORT_SYMBOL(mlx5_qp_debugfs_cleanup);
+
+void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
+{
+ dev->priv.dbg.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg.dbg_root);
+}
+
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ debugfs_remove_recursive(dev->priv.dbg.eq_debugfs);
+}
+
+static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_cmd_stats *stats;
+ u64 field = 0;
+ int ret;
+ char tbuf[22];
+
+ stats = filp->private_data;
+ spin_lock_irq(&stats->lock);
+ if (stats->n)
+ field = div64_u64(stats->sum, stats->n);
+ spin_unlock_irq(&stats->lock);
+ ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static ssize_t average_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_cmd_stats *stats;
+
+ stats = filp->private_data;
+ spin_lock_irq(&stats->lock);
+ stats->sum = 0;
+ stats->n = 0;
+ spin_unlock_irq(&stats->lock);
+
+ *pos += count;
+
+ return count;
+}
+
+static const struct file_operations stats_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = average_read,
+ .write = average_write,
+};
+
+static ssize_t slots_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_cmd *cmd;
+ char tbuf[6];
+ int weight;
+ int field;
+ int ret;
+
+ cmd = filp->private_data;
+ weight = bitmap_weight(&cmd->vars.bitmask, cmd->vars.max_reg_cmds);
+ field = cmd->vars.max_reg_cmds - weight;
+ ret = snprintf(tbuf, sizeof(tbuf), "%d\n", field);
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static const struct file_operations slots_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = slots_read,
+};
+
+void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd_stats *stats;
+ struct dentry **cmd;
+ const char *namep;
+ int i;
+
+ cmd = &dev->priv.dbg.cmdif_debugfs;
+ *cmd = debugfs_create_dir("commands", dev->priv.dbg.dbg_root);
+
+ debugfs_create_file("slots_inuse", 0400, *cmd, &dev->cmd, &slots_fops);
+
+ for (i = 0; i < MLX5_CMD_OP_MAX; i++) {
+ stats = &dev->cmd.stats[i];
+ namep = mlx5_command_str(i);
+ if (strcmp(namep, "unknown command opcode")) {
+ stats->root = debugfs_create_dir(namep, *cmd);
+
+ debugfs_create_file("average", 0400, stats->root, stats,
+ &stats_fops);
+ debugfs_create_u64("n", 0400, stats->root, &stats->n);
+ debugfs_create_u64("failed", 0400, stats->root, &stats->failed);
+ debugfs_create_u64("failed_mbox_status", 0400, stats->root,
+ &stats->failed_mbox_status);
+ debugfs_create_u32("last_failed_errno", 0400, stats->root,
+ &stats->last_failed_errno);
+ debugfs_create_u8("last_failed_mbox_status", 0400, stats->root,
+ &stats->last_failed_mbox_status);
+ debugfs_create_x32("last_failed_syndrome", 0400, stats->root,
+ &stats->last_failed_syndrome);
+ }
+ }
+}
+
+void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ debugfs_remove_recursive(dev->priv.dbg.cmdif_debugfs);
+}
+
+void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
+{
+ dev->priv.dbg.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg.dbg_root);
+}
+
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ debugfs_remove_recursive(dev->priv.dbg.cq_debugfs);
+}
+
+void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev)
+{
+ struct dentry *pages;
+
+ dev->priv.dbg.pages_debugfs = debugfs_create_dir("pages", dev->priv.dbg.dbg_root);
+ pages = dev->priv.dbg.pages_debugfs;
+
+ debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages);
+ debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]);
+ debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]);
+ debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]);
+ debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed);
+ debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped);
+ debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages,
+ &dev->priv.reclaim_pages_discard);
+}
+
+void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ debugfs_remove_recursive(dev->priv.dbg.pages_debugfs);
+}
+
+static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+ int index, int *is_str)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
+ u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {};
+ u64 param = 0;
+ u32 *out;
+ int state;
+ u32 *qpc;
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return 0;
+
+ MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+ MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+ err = mlx5_cmd_exec_inout(dev, query_qp, in, out);
+ if (err)
+ goto out;
+
+ *is_str = 0;
+
+ qpc = MLX5_ADDR_OF(query_qp_out, out, qpc);
+ switch (index) {
+ case QP_PID:
+ param = qp->pid;
+ break;
+ case QP_STATE:
+ state = MLX5_GET(qpc, qpc, state);
+ param = (unsigned long)mlx5_qp_state_str(state);
+ *is_str = 1;
+ break;
+ case QP_XPORT:
+ param = (unsigned long)mlx5_qp_type_str(MLX5_GET(qpc, qpc, st));
+ *is_str = 1;
+ break;
+ case QP_MTU:
+ switch (MLX5_GET(qpc, qpc, mtu)) {
+ case IB_MTU_256:
+ param = 256;
+ break;
+ case IB_MTU_512:
+ param = 512;
+ break;
+ case IB_MTU_1024:
+ param = 1024;
+ break;
+ case IB_MTU_2048:
+ param = 2048;
+ break;
+ case IB_MTU_4096:
+ param = 4096;
+ break;
+ default:
+ param = 0;
+ }
+ break;
+ case QP_N_RECV:
+ param = 1 << MLX5_GET(qpc, qpc, log_rq_size);
+ break;
+ case QP_RECV_SZ:
+ param = 1 << (MLX5_GET(qpc, qpc, log_rq_stride) + 4);
+ break;
+ case QP_N_SEND:
+ if (!MLX5_GET(qpc, qpc, no_sq))
+ param = 1 << MLX5_GET(qpc, qpc, log_sq_size);
+ break;
+ case QP_LOG_PG_SZ:
+ param = MLX5_GET(qpc, qpc, log_page_size) + 12;
+ break;
+ case QP_RQPN:
+ param = MLX5_GET(qpc, qpc, remote_qpn);
+ break;
+ }
+out:
+ kfree(out);
+ return param;
+}
+
+static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ int index)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_eq_out);
+ u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
+ u64 param = 0;
+ void *ctx;
+ u32 *out;
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return param;
+
+ MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+ MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+ err = mlx5_cmd_exec_inout(dev, query_eq, in, out);
+ if (err) {
+ mlx5_core_warn(dev, "failed to query eq\n");
+ goto out;
+ }
+ ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry);
+
+ switch (index) {
+ case EQ_NUM_EQES:
+ param = 1 << MLX5_GET(eqc, ctx, log_eq_size);
+ break;
+ case EQ_INTR:
+ param = MLX5_GET(eqc, ctx, intr);
+ break;
+ case EQ_LOG_PG_SZ:
+ param = MLX5_GET(eqc, ctx, log_page_size) + 12;
+ break;
+ }
+
+out:
+ kfree(out);
+ return param;
+}
+
+static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ int index)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cq_out);
+ u64 param = 0;
+ void *ctx;
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return param;
+
+ err = mlx5_core_query_cq(dev, cq, out);
+ if (err) {
+ mlx5_core_warn(dev, "failed to query cq\n");
+ goto out;
+ }
+ ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context);
+
+ switch (index) {
+ case CQ_PID:
+ param = cq->pid;
+ break;
+ case CQ_NUM_CQES:
+ param = 1 << MLX5_GET(cqc, ctx, log_cq_size);
+ break;
+ case CQ_LOG_PG_SZ:
+ param = MLX5_GET(cqc, ctx, log_page_size);
+ break;
+ }
+
+out:
+ kvfree(out);
+ return param;
+}
+
+static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_field_desc *desc;
+ struct mlx5_rsc_debug *d;
+ char tbuf[18];
+ int is_str = 0;
+ u64 field;
+ int ret;
+
+ desc = filp->private_data;
+ d = (void *)(desc - desc->i) - sizeof(*d);
+ switch (d->type) {
+ case MLX5_DBG_RSC_QP:
+ field = qp_read_field(d->dev, d->object, desc->i, &is_str);
+ break;
+
+ case MLX5_DBG_RSC_EQ:
+ field = eq_read_field(d->dev, d->object, desc->i);
+ break;
+
+ case MLX5_DBG_RSC_CQ:
+ field = cq_read_field(d->dev, d->object, desc->i);
+ break;
+
+ default:
+ mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type);
+ return -EINVAL;
+ }
+
+ if (is_str)
+ ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)(unsigned long)field);
+ else
+ ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
+
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = dbg_read,
+};
+
+static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type,
+ struct dentry *root, struct mlx5_rsc_debug **dbg,
+ int rsn, char **field, int nfile, void *data)
+{
+ struct mlx5_rsc_debug *d;
+ char resn[32];
+ int i;
+
+ d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ d->dev = dev;
+ d->object = data;
+ d->type = type;
+ sprintf(resn, "0x%x", rsn);
+ d->root = debugfs_create_dir(resn, root);
+
+ for (i = 0; i < nfile; i++) {
+ d->fields[i].i = i;
+ debugfs_create_file(field[i], 0400, d->root, &d->fields[i],
+ &fops);
+ }
+ *dbg = d;
+
+ return 0;
+}
+
+static void rem_res_tree(struct mlx5_rsc_debug *d)
+{
+ debugfs_remove_recursive(d->root);
+ kfree(d);
+}
+
+int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+ int err;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.dbg.qp_debugfs,
+ &qp->dbg, qp->qpn, qp_fields,
+ ARRAY_SIZE(qp_fields), qp);
+ if (err)
+ qp->dbg = NULL;
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_debug_qp_add);
+
+void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ if (qp->dbg)
+ rem_res_tree(qp->dbg);
+}
+EXPORT_SYMBOL(mlx5_debug_qp_remove);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.dbg.eq_debugfs,
+ &eq->dbg, eq->eqn, eq_fields,
+ ARRAY_SIZE(eq_fields), eq);
+ if (err)
+ eq->dbg = NULL;
+
+ return err;
+}
+
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ if (eq->dbg)
+ rem_res_tree(eq->dbg);
+}
+
+int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+ int err;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.dbg.cq_debugfs,
+ &cq->dbg, cq->cqn, cq_fields,
+ ARRAY_SIZE(cq_fields), cq);
+ if (err)
+ cq->dbg = NULL;
+
+ return err;
+}
+
+void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ if (cq->dbg) {
+ rem_res_tree(cq->dbg);
+ cq->dbg = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
new file mode 100644
index 000000000..02bb9d43f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -0,0 +1,648 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/mlx5_ifc_vdpa.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+
+/* intf dev list mutex */
+static DEFINE_MUTEX(mlx5_intf_mutex);
+static DEFINE_IDA(mlx5_adev_ida);
+
+static bool is_eth_rep_supported(struct mlx5_core_dev *dev)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_ESWITCH))
+ return false;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return false;
+
+ if (!is_mdev_switchdev_mode(dev))
+ return false;
+
+ return true;
+}
+
+bool mlx5_eth_supported(struct mlx5_core_dev *dev)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_CORE_EN))
+ return false;
+
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return false;
+
+ if (!MLX5_CAP_GEN(dev, eth_net_offloads)) {
+ mlx5_core_warn(dev, "Missing eth_net_offloads capability\n");
+ return false;
+ }
+
+ if (!MLX5_CAP_GEN(dev, nic_flow_table)) {
+ mlx5_core_warn(dev, "Missing nic_flow_table capability\n");
+ return false;
+ }
+
+ if (!MLX5_CAP_ETH(dev, csum_cap)) {
+ mlx5_core_warn(dev, "Missing csum_cap capability\n");
+ return false;
+ }
+
+ if (!MLX5_CAP_ETH(dev, max_lso_cap)) {
+ mlx5_core_warn(dev, "Missing max_lso_cap capability\n");
+ return false;
+ }
+
+ if (!MLX5_CAP_ETH(dev, vlan_cap)) {
+ mlx5_core_warn(dev, "Missing vlan_cap capability\n");
+ return false;
+ }
+
+ if (!MLX5_CAP_ETH(dev, rss_ind_tbl_cap)) {
+ mlx5_core_warn(dev, "Missing rss_ind_tbl_cap capability\n");
+ return false;
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.max_ft_level) < 3) {
+ mlx5_core_warn(dev, "max_ft_level < 3\n");
+ return false;
+ }
+
+ if (!MLX5_CAP_ETH(dev, self_lb_en_modifiable))
+ mlx5_core_warn(dev, "Self loop back prevention is not supported\n");
+ if (!MLX5_CAP_GEN(dev, cq_moderation))
+ mlx5_core_warn(dev, "CQ moderation is not supported\n");
+
+ return true;
+}
+
+static bool is_eth_enabled(struct mlx5_core_dev *dev)
+{
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ &val);
+ return err ? false : val.vbool;
+}
+
+bool mlx5_vnet_supported(struct mlx5_core_dev *dev)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET))
+ return false;
+
+ if (mlx5_core_is_pf(dev))
+ return false;
+
+ if (!(MLX5_CAP_GEN_64(dev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q))
+ return false;
+
+ if (!(MLX5_CAP_DEV_VDPA_EMULATION(dev, event_mode) &
+ MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE))
+ return false;
+
+ if (!MLX5_CAP_DEV_VDPA_EMULATION(dev, eth_frame_offload_type))
+ return false;
+
+ return true;
+}
+
+static bool is_vnet_enabled(struct mlx5_core_dev *dev)
+{
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ &val);
+ return err ? false : val.vbool;
+}
+
+static bool is_ib_rep_supported(struct mlx5_core_dev *dev)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return false;
+
+ if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
+ return false;
+
+ if (!is_eth_rep_supported(dev))
+ return false;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return false;
+
+ if (!is_mdev_switchdev_mode(dev))
+ return false;
+
+ if (mlx5_core_mp_enabled(dev))
+ return false;
+
+ return true;
+}
+
+static bool is_mp_supported(struct mlx5_core_dev *dev)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return false;
+
+ if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
+ return false;
+
+ if (is_ib_rep_supported(dev))
+ return false;
+
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return false;
+
+ if (!mlx5_core_is_mp_slave(dev))
+ return false;
+
+ return true;
+}
+
+bool mlx5_rdma_supported(struct mlx5_core_dev *dev)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return false;
+
+ if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
+ return false;
+
+ if (is_ib_rep_supported(dev))
+ return false;
+
+ if (is_mp_supported(dev))
+ return false;
+
+ return true;
+}
+
+static bool is_ib_enabled(struct mlx5_core_dev *dev)
+{
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ &val);
+ return err ? false : val.vbool;
+}
+
+enum {
+ MLX5_INTERFACE_PROTOCOL_ETH,
+ MLX5_INTERFACE_PROTOCOL_ETH_REP,
+
+ MLX5_INTERFACE_PROTOCOL_IB,
+ MLX5_INTERFACE_PROTOCOL_IB_REP,
+ MLX5_INTERFACE_PROTOCOL_MPIB,
+
+ MLX5_INTERFACE_PROTOCOL_VNET,
+};
+
+static const struct mlx5_adev_device {
+ const char *suffix;
+ bool (*is_supported)(struct mlx5_core_dev *dev);
+ bool (*is_enabled)(struct mlx5_core_dev *dev);
+} mlx5_adev_devices[] = {
+ [MLX5_INTERFACE_PROTOCOL_VNET] = { .suffix = "vnet",
+ .is_supported = &mlx5_vnet_supported,
+ .is_enabled = &is_vnet_enabled },
+ [MLX5_INTERFACE_PROTOCOL_IB] = { .suffix = "rdma",
+ .is_supported = &mlx5_rdma_supported,
+ .is_enabled = &is_ib_enabled },
+ [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth",
+ .is_supported = &mlx5_eth_supported,
+ .is_enabled = &is_eth_enabled },
+ [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep",
+ .is_supported = &is_eth_rep_supported },
+ [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep",
+ .is_supported = &is_ib_rep_supported },
+ [MLX5_INTERFACE_PROTOCOL_MPIB] = { .suffix = "multiport",
+ .is_supported = &is_mp_supported },
+};
+
+int mlx5_adev_idx_alloc(void)
+{
+ return ida_alloc(&mlx5_adev_ida, GFP_KERNEL);
+}
+
+void mlx5_adev_idx_free(int idx)
+{
+ ida_free(&mlx5_adev_ida, idx);
+}
+
+int mlx5_adev_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
+ priv->adev = kcalloc(ARRAY_SIZE(mlx5_adev_devices),
+ sizeof(struct mlx5_adev *), GFP_KERNEL);
+ if (!priv->adev)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_adev_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
+ kfree(priv->adev);
+}
+
+static void adev_release(struct device *dev)
+{
+ struct mlx5_adev *mlx5_adev =
+ container_of(dev, struct mlx5_adev, adev.dev);
+ struct mlx5_priv *priv = &mlx5_adev->mdev->priv;
+ int idx = mlx5_adev->idx;
+
+ kfree(mlx5_adev);
+ priv->adev[idx] = NULL;
+}
+
+static struct mlx5_adev *add_adev(struct mlx5_core_dev *dev, int idx)
+{
+ const char *suffix = mlx5_adev_devices[idx].suffix;
+ struct auxiliary_device *adev;
+ struct mlx5_adev *madev;
+ int ret;
+
+ madev = kzalloc(sizeof(*madev), GFP_KERNEL);
+ if (!madev)
+ return ERR_PTR(-ENOMEM);
+
+ adev = &madev->adev;
+ adev->id = dev->priv.adev_idx;
+ adev->name = suffix;
+ adev->dev.parent = dev->device;
+ adev->dev.release = adev_release;
+ madev->mdev = dev;
+ madev->idx = idx;
+
+ ret = auxiliary_device_init(adev);
+ if (ret) {
+ kfree(madev);
+ return ERR_PTR(ret);
+ }
+
+ ret = auxiliary_device_add(adev);
+ if (ret) {
+ auxiliary_device_uninit(adev);
+ return ERR_PTR(ret);
+ }
+ return madev;
+}
+
+static void del_adev(struct auxiliary_device *adev)
+{
+ auxiliary_device_delete(adev);
+ auxiliary_device_uninit(adev);
+}
+
+int mlx5_attach_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct auxiliary_device *adev;
+ struct auxiliary_driver *adrv;
+ int ret = 0, i;
+
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&mlx5_intf_mutex);
+ priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
+ priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
+ if (!priv->adev[i]) {
+ bool is_supported = false;
+
+ if (mlx5_adev_devices[i].is_enabled) {
+ bool enabled;
+
+ enabled = mlx5_adev_devices[i].is_enabled(dev);
+ if (!enabled)
+ continue;
+ }
+
+ if (mlx5_adev_devices[i].is_supported)
+ is_supported = mlx5_adev_devices[i].is_supported(dev);
+
+ if (!is_supported)
+ continue;
+
+ priv->adev[i] = add_adev(dev, i);
+ if (IS_ERR(priv->adev[i])) {
+ ret = PTR_ERR(priv->adev[i]);
+ priv->adev[i] = NULL;
+ }
+ } else {
+ adev = &priv->adev[i]->adev;
+
+ /* Pay attention that this is not PCI driver that
+ * mlx5_core_dev is connected, but auxiliary driver.
+ *
+ * Here we can race of module unload with devlink
+ * reload, but we don't need to take extra lock because
+ * we are holding global mlx5_intf_mutex.
+ */
+ if (!adev->dev.driver)
+ continue;
+ adrv = to_auxiliary_drv(adev->dev.driver);
+
+ if (adrv->resume)
+ ret = adrv->resume(adev);
+ }
+ if (ret) {
+ mlx5_core_warn(dev, "Device[%d] (%s) failed to load\n",
+ i, mlx5_adev_devices[i].suffix);
+
+ break;
+ }
+ }
+ priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ mutex_unlock(&mlx5_intf_mutex);
+ return ret;
+}
+
+void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct auxiliary_device *adev;
+ struct auxiliary_driver *adrv;
+ pm_message_t pm = {};
+ int i;
+
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&mlx5_intf_mutex);
+ priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) {
+ if (!priv->adev[i])
+ continue;
+
+ if (mlx5_adev_devices[i].is_enabled) {
+ bool enabled;
+
+ enabled = mlx5_adev_devices[i].is_enabled(dev);
+ if (!enabled)
+ goto skip_suspend;
+ }
+
+ adev = &priv->adev[i]->adev;
+ /* Auxiliary driver was unbind manually through sysfs */
+ if (!adev->dev.driver)
+ goto skip_suspend;
+
+ adrv = to_auxiliary_drv(adev->dev.driver);
+
+ if (adrv->suspend && suspend) {
+ adrv->suspend(adev, pm);
+ continue;
+ }
+
+skip_suspend:
+ del_adev(&priv->adev[i]->adev);
+ priv->adev[i] = NULL;
+ }
+ priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ priv->flags |= MLX5_PRIV_FLAGS_DETACH;
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_register_device(struct mlx5_core_dev *dev)
+{
+ int ret;
+
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&mlx5_intf_mutex);
+ dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
+ ret = mlx5_rescan_drivers_locked(dev);
+ mutex_unlock(&mlx5_intf_mutex);
+ if (ret)
+ mlx5_unregister_device(dev);
+
+ return ret;
+}
+
+void mlx5_unregister_device(struct mlx5_core_dev *dev)
+{
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&mlx5_intf_mutex);
+ dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
+ mlx5_rescan_drivers_locked(dev);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+static int add_drivers(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ int i, ret = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
+ bool is_supported = false;
+
+ if (priv->adev[i])
+ continue;
+
+ if (mlx5_adev_devices[i].is_supported)
+ is_supported = mlx5_adev_devices[i].is_supported(dev);
+
+ if (!is_supported)
+ continue;
+
+ priv->adev[i] = add_adev(dev, i);
+ if (IS_ERR(priv->adev[i])) {
+ mlx5_core_warn(dev, "Device[%d] (%s) failed to load\n",
+ i, mlx5_adev_devices[i].suffix);
+ /* We continue to rescan drivers and leave to the caller
+ * to make decision if to release everything or continue.
+ */
+ ret = PTR_ERR(priv->adev[i]);
+ priv->adev[i] = NULL;
+ }
+ }
+ return ret;
+}
+
+static void delete_drivers(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ bool delete_all;
+ int i;
+
+ delete_all = priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
+
+ for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) {
+ bool is_supported = false;
+
+ if (!priv->adev[i])
+ continue;
+
+ if (mlx5_adev_devices[i].is_enabled) {
+ bool enabled;
+
+ enabled = mlx5_adev_devices[i].is_enabled(dev);
+ if (!enabled)
+ goto del_adev;
+ }
+
+ if (mlx5_adev_devices[i].is_supported && !delete_all)
+ is_supported = mlx5_adev_devices[i].is_supported(dev);
+
+ if (is_supported)
+ continue;
+
+del_adev:
+ del_adev(&priv->adev[i]->adev);
+ priv->adev[i] = NULL;
+ }
+}
+
+/* This function is used after mlx5_core_dev is reconfigured.
+ */
+int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ int err = 0;
+
+ lockdep_assert_held(&mlx5_intf_mutex);
+ if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
+ return 0;
+
+ priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ delete_drivers(dev);
+ if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ goto out;
+
+ err = add_drivers(dev);
+
+out:
+ priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
+ return err;
+}
+
+bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev)
+{
+ u64 fsystem_guid, psystem_guid;
+
+ fsystem_guid = mlx5_query_nic_system_image_guid(dev);
+ psystem_guid = mlx5_query_nic_system_image_guid(peer_dev);
+
+ return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid);
+}
+
+static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev)
+{
+ return (u32)((pci_domain_nr(dev->pdev->bus) << 16) |
+ (dev->pdev->bus->number << 8) |
+ PCI_SLOT(dev->pdev->devfn));
+}
+
+static int _next_phys_dev(struct mlx5_core_dev *mdev,
+ const struct mlx5_core_dev *curr)
+{
+ if (!mlx5_core_is_pf(mdev))
+ return 0;
+
+ if (mdev == curr)
+ return 0;
+
+ if (!mlx5_same_hw_devs(mdev, (struct mlx5_core_dev *)curr) &&
+ mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr))
+ return 0;
+
+ return 1;
+}
+
+static void *pci_get_other_drvdata(struct device *this, struct device *other)
+{
+ if (this->driver != other->driver)
+ return NULL;
+
+ return pci_get_drvdata(to_pci_dev(other));
+}
+
+static int next_phys_dev_lag(struct device *dev, const void *data)
+{
+ struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;
+
+ mdev = pci_get_other_drvdata(this->device, dev);
+ if (!mdev)
+ return 0;
+
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager) ||
+ !MLX5_CAP_GEN(mdev, lag_master) ||
+ (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS ||
+ MLX5_CAP_GEN(mdev, num_lag_ports) <= 1))
+ return 0;
+
+ return _next_phys_dev(mdev, data);
+}
+
+static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev,
+ int (*match)(struct device *dev, const void *data))
+{
+ struct device *next;
+
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+
+ next = bus_find_device(&pci_bus_type, NULL, dev, match);
+ if (!next)
+ return NULL;
+
+ put_device(next);
+ return pci_get_drvdata(to_pci_dev(next));
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev)
+{
+ lockdep_assert_held(&mlx5_intf_mutex);
+ return mlx5_get_next_dev(dev, &next_phys_dev_lag);
+}
+
+void mlx5_dev_list_lock(void)
+{
+ mutex_lock(&mlx5_intf_mutex);
+}
+void mlx5_dev_list_unlock(void)
+{
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_dev_list_trylock(void)
+{
+ return mutex_trylock(&mlx5_intf_mutex);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
new file mode 100644
index 000000000..3749eb83d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -0,0 +1,918 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <devlink.h>
+
+#include "mlx5_core.h"
+#include "fw_reset.h"
+#include "fs_core.h"
+#include "eswitch.h"
+#include "esw/qos.h"
+#include "sf/dev/dev.h"
+#include "sf/sf.h"
+
+static int mlx5_devlink_flash_update(struct devlink *devlink,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ return mlx5_firmware_flash(dev, params->fw, extack);
+}
+
+static u8 mlx5_fw_ver_major(u32 version)
+{
+ return (version >> 24) & 0xff;
+}
+
+static u8 mlx5_fw_ver_minor(u32 version)
+{
+ return (version >> 16) & 0xff;
+}
+
+static u16 mlx5_fw_ver_subminor(u32 version)
+{
+ return version & 0xffff;
+}
+
+#define DEVLINK_FW_STRING_LEN 32
+
+static int
+mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ char version_str[DEVLINK_FW_STRING_LEN];
+ u32 running_fw, stored_fw;
+ int err;
+
+ err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+ if (err)
+ return err;
+
+ err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
+ if (err)
+ return err;
+
+ err = mlx5_fw_version_query(dev, &running_fw, &stored_fw);
+ if (err)
+ return err;
+
+ snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+ mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw),
+ mlx5_fw_ver_subminor(running_fw));
+ err = devlink_info_version_running_put(req, "fw.version", version_str);
+ if (err)
+ return err;
+ err = devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW,
+ version_str);
+ if (err)
+ return err;
+
+ /* no pending version, return running (stored) version */
+ if (stored_fw == 0)
+ stored_fw = running_fw;
+
+ snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+ mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw),
+ mlx5_fw_ver_subminor(stored_fw));
+ err = devlink_info_version_stored_put(req, "fw.version", version_str);
+ if (err)
+ return err;
+ return devlink_info_version_stored_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW,
+ version_str);
+}
+
+static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u8 reset_level, reset_type, net_port_alive;
+ int err;
+
+ err = mlx5_fw_reset_query(dev, &reset_level, &reset_type);
+ if (err)
+ return err;
+ if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL3)) {
+ NL_SET_ERR_MSG_MOD(extack, "FW activate requires reboot");
+ return -EINVAL;
+ }
+
+ net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE);
+ err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack);
+ if (err)
+ return err;
+
+ err = mlx5_fw_reset_wait_reset_done(dev);
+ if (err)
+ return err;
+
+ mlx5_unload_one_devl_locked(dev, true);
+ err = mlx5_health_wait_pci_up(dev);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset");
+
+ return err;
+}
+
+static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u8 reset_level;
+ int err;
+
+ err = mlx5_fw_reset_query(dev, &reset_level, NULL);
+ if (err)
+ return err;
+ if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL0)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "FW upgrade to the stored FW can't be done by FW live patching");
+ return -EINVAL;
+ }
+
+ return mlx5_fw_reset_set_live_patch(dev);
+}
+
+static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct pci_dev *pdev = dev->pdev;
+ bool sf_dev_allocated;
+ int ret = 0;
+
+ sf_dev_allocated = mlx5_sf_dev_allocated(dev);
+ if (sf_dev_allocated) {
+ /* Reload results in deleting SF device which further results in
+ * unregistering devlink instance while holding devlink_mutext.
+ * Hence, do not support reload.
+ */
+ NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated");
+ return -EOPNOTSUPP;
+ }
+
+ if (mlx5_lag_is_active(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode");
+ return -EOPNOTSUPP;
+ }
+
+ if (pci_num_vf(pdev)) {
+ NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable");
+ }
+
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ mlx5_unload_one_devl_locked(dev, false);
+ break;
+ case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+ if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+ ret = mlx5_devlink_trigger_fw_live_patch(devlink, extack);
+ else
+ ret = mlx5_devlink_reload_fw_activate(devlink, extack);
+ break;
+ default:
+ /* Unsupported action should not get to this function */
+ WARN_ON(1);
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+ enum devlink_reload_limit limit, u32 *actions_performed,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int ret = 0;
+
+ *actions_performed = BIT(action);
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ ret = mlx5_load_one_devl_locked(dev, false);
+ break;
+ case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+ if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+ break;
+ /* On fw_activate action, also driver is reloaded and reinit performed */
+ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
+ ret = mlx5_load_one_devl_locked(dev, true);
+ break;
+ default:
+ /* Unsupported action should not get to this function */
+ WARN_ON(1);
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static struct mlx5_devlink_trap *mlx5_find_trap_by_id(struct mlx5_core_dev *dev, int trap_id)
+{
+ struct mlx5_devlink_trap *dl_trap;
+
+ list_for_each_entry(dl_trap, &dev->priv.traps, list)
+ if (dl_trap->trap.id == trap_id)
+ return dl_trap;
+
+ return NULL;
+}
+
+static int mlx5_devlink_trap_init(struct devlink *devlink, const struct devlink_trap *trap,
+ void *trap_ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_devlink_trap *dl_trap;
+
+ dl_trap = kzalloc(sizeof(*dl_trap), GFP_KERNEL);
+ if (!dl_trap)
+ return -ENOMEM;
+
+ dl_trap->trap.id = trap->id;
+ dl_trap->trap.action = DEVLINK_TRAP_ACTION_DROP;
+ dl_trap->item = trap_ctx;
+
+ if (mlx5_find_trap_by_id(dev, trap->id)) {
+ kfree(dl_trap);
+ mlx5_core_err(dev, "Devlink trap: Trap 0x%x already found", trap->id);
+ return -EEXIST;
+ }
+
+ list_add_tail(&dl_trap->list, &dev->priv.traps);
+ return 0;
+}
+
+static void mlx5_devlink_trap_fini(struct devlink *devlink, const struct devlink_trap *trap,
+ void *trap_ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_devlink_trap *dl_trap;
+
+ dl_trap = mlx5_find_trap_by_id(dev, trap->id);
+ if (!dl_trap) {
+ mlx5_core_err(dev, "Devlink trap: Missing trap id 0x%x", trap->id);
+ return;
+ }
+ list_del(&dl_trap->list);
+ kfree(dl_trap);
+}
+
+static int mlx5_devlink_trap_action_set(struct devlink *devlink,
+ const struct devlink_trap *trap,
+ enum devlink_trap_action action,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ enum devlink_trap_action action_orig;
+ struct mlx5_devlink_trap *dl_trap;
+ int err = 0;
+
+ if (is_mdev_switchdev_mode(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode");
+ return -EOPNOTSUPP;
+ }
+
+ dl_trap = mlx5_find_trap_by_id(dev, trap->id);
+ if (!dl_trap) {
+ mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (action != DEVLINK_TRAP_ACTION_DROP && action != DEVLINK_TRAP_ACTION_TRAP) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (action == dl_trap->trap.action)
+ goto out;
+
+ action_orig = dl_trap->trap.action;
+ dl_trap->trap.action = action;
+ err = mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_TYPE_TRAP,
+ &dl_trap->trap);
+ if (err)
+ dl_trap->trap.action = action_orig;
+out:
+ return err;
+}
+
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_ESWITCH
+ .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+ .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+ .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
+ .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
+ .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
+ .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
+ .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get,
+ .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set,
+ .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set,
+ .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set,
+ .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set,
+ .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set,
+ .rate_node_new = mlx5_esw_devlink_rate_node_new,
+ .rate_node_del = mlx5_esw_devlink_rate_node_del,
+ .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
+#endif
+#ifdef CONFIG_MLX5_SF_MANAGER
+ .port_new = mlx5_devlink_sf_port_new,
+ .port_del = mlx5_devlink_sf_port_del,
+ .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get,
+ .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set,
+#endif
+ .flash_update = mlx5_devlink_flash_update,
+ .info_get = mlx5_devlink_info_get,
+ .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+ BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
+ .reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET),
+ .reload_down = mlx5_devlink_reload_down,
+ .reload_up = mlx5_devlink_reload_up,
+ .trap_init = mlx5_devlink_trap_init,
+ .trap_fini = mlx5_devlink_trap_fini,
+ .trap_action_set = mlx5_devlink_trap_action_set,
+};
+
+void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb,
+ struct devlink_port *dl_port)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_devlink_trap *dl_trap;
+
+ dl_trap = mlx5_find_trap_by_id(dev, trap_id);
+ if (!dl_trap) {
+ mlx5_core_err(dev, "Devlink trap: Report on invalid trap id 0x%x", trap_id);
+ return;
+ }
+
+ if (dl_trap->trap.action != DEVLINK_TRAP_ACTION_TRAP) {
+ mlx5_core_dbg(dev, "Devlink trap: Trap id %d has action %d", trap_id,
+ dl_trap->trap.action);
+ return;
+ }
+ devlink_trap_report(devlink, skb, dl_trap->item, dl_port, NULL);
+}
+
+int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev)
+{
+ struct mlx5_devlink_trap *dl_trap;
+ int count = 0;
+
+ list_for_each_entry(dl_trap, &dev->priv.traps, list)
+ if (dl_trap->trap.action == DEVLINK_TRAP_ACTION_TRAP)
+ count++;
+
+ return count;
+}
+
+int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
+ enum devlink_trap_action *action)
+{
+ struct mlx5_devlink_trap *dl_trap;
+
+ dl_trap = mlx5_find_trap_by_id(dev, trap_id);
+ if (!dl_trap) {
+ mlx5_core_err(dev, "Devlink trap: Get action on invalid trap id 0x%x",
+ trap_id);
+ return -EINVAL;
+ }
+
+ *action = dl_trap->trap.action;
+ return 0;
+}
+
+struct devlink *mlx5_devlink_alloc(struct device *dev)
+{
+ return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev),
+ dev);
+}
+
+void mlx5_devlink_free(struct devlink *devlink)
+{
+ devlink_free(devlink);
+}
+
+static int mlx5_devlink_fs_mode_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ char *value = val.vstr;
+ int err = 0;
+
+ if (!strcmp(value, "dmfs")) {
+ return 0;
+ } else if (!strcmp(value, "smfs")) {
+ u8 eswitch_mode;
+ bool smfs_cap;
+
+ eswitch_mode = mlx5_eswitch_mode(dev);
+ smfs_cap = mlx5_fs_dr_is_supported(dev);
+
+ if (!smfs_cap) {
+ err = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_MOD(extack,
+ "Software managed steering is not supported by current device");
+ }
+
+ else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Software managed steering is not supported when eswitch offloads enabled.");
+ err = -EOPNOTSUPP;
+ }
+ } else {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Bad parameter: supported values are [\"dmfs\", \"smfs\"]");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int mlx5_devlink_fs_mode_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ enum mlx5_flow_steering_mode mode;
+
+ if (!strcmp(ctx->val.vstr, "smfs"))
+ mode = MLX5_FLOW_STEERING_MODE_SMFS;
+ else
+ mode = MLX5_FLOW_STEERING_MODE_DMFS;
+ dev->priv.steering->mode = mode;
+
+ return 0;
+}
+
+static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS)
+ strcpy(ctx->val.vstr, "smfs");
+ else
+ strcpy(ctx->val.vstr, "dmfs");
+ return 0;
+}
+
+static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ bool new_state = val.vbool;
+
+ if (new_state && !MLX5_CAP_GEN(dev, roce) &&
+ !(MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))) {
+ NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE");
+ return -EOPNOTSUPP;
+ }
+ if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ int group_num = val.vu32;
+
+ if (group_num < 1 || group_num > 1024) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported group number, supported range is 1-1024");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5_devlink_esw_port_metadata_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return -EOPNOTSUPP;
+
+ return mlx5_esw_offloads_vport_metadata_set(dev->priv.eswitch, ctx->val.vbool);
+}
+
+static int mlx5_devlink_esw_port_metadata_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return -EOPNOTSUPP;
+
+ ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch);
+ return 0;
+}
+
+static int mlx5_devlink_esw_port_metadata_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u8 esw_mode;
+
+ if (!MLX5_ESWITCH_MANAGER(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported");
+ return -EOPNOTSUPP;
+ }
+ esw_mode = mlx5_eswitch_mode(dev);
+ if (esw_mode == MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch must either disabled or non switchdev mode");
+ return -EBUSY;
+ }
+ return 0;
+}
+
+#endif
+
+static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ mlx5_fw_reset_enable_remote_dev_reset_set(dev, ctx->val.vbool);
+ return 0;
+}
+
+static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ ctx->val.vbool = mlx5_fw_reset_enable_remote_dev_reset_get(dev);
+ return 0;
+}
+
+static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL;
+}
+
+static const struct devlink_param mlx5_devlink_params[] = {
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
+ "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set,
+ mlx5_devlink_fs_mode_validate),
+ DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_enable_roce_validate),
+#ifdef CONFIG_MLX5_ESWITCH
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
+ "fdb_large_groups", DEVLINK_PARAM_TYPE_U32,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL,
+ mlx5_devlink_large_group_num_validate),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
+ "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ mlx5_devlink_esw_port_metadata_get,
+ mlx5_devlink_esw_port_metadata_set,
+ mlx5_devlink_esw_port_metadata_validate),
+#endif
+ DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ mlx5_devlink_enable_remote_dev_reset_get,
+ mlx5_devlink_enable_remote_dev_reset_set, NULL),
+ DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_eq_depth_validate),
+ DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_eq_depth_validate),
+};
+
+static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+
+ value.vbool = MLX5_CAP_GEN(dev, roce);
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+ value);
+
+#ifdef CONFIG_MLX5_ESWITCH
+ value.vu32 = ESW_OFFLOADS_DEFAULT_NUM_GROUPS;
+ devlink_param_driverinit_value_set(devlink,
+ MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
+ value);
+#endif
+
+ value.vu32 = MLX5_COMP_EQ_SIZE;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ value);
+
+ value.vu32 = MLX5_NUM_ASYNC_EQE;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ value);
+}
+
+static const struct devlink_param enable_eth_param =
+ DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL);
+
+static int mlx5_devlink_eth_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!mlx5_eth_supported(dev))
+ return 0;
+
+ err = devlink_param_register(devlink, &enable_eth_param);
+ if (err)
+ return err;
+
+ value.vbool = true;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ value);
+ return 0;
+}
+
+static void mlx5_devlink_eth_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!mlx5_eth_supported(dev))
+ return;
+
+ devlink_param_unregister(devlink, &enable_eth_param);
+}
+
+static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ bool new_state = val.vbool;
+
+ if (new_state && !mlx5_rdma_supported(dev))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static const struct devlink_param enable_rdma_param =
+ DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_enable_rdma_validate);
+
+static int mlx5_devlink_rdma_param_register(struct devlink *devlink)
+{
+ union devlink_param_value value;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return 0;
+
+ err = devlink_param_register(devlink, &enable_rdma_param);
+ if (err)
+ return err;
+
+ value.vbool = true;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ value);
+ return 0;
+}
+
+static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return;
+
+ devlink_param_unregister(devlink, &enable_rdma_param);
+}
+
+static const struct devlink_param enable_vnet_param =
+ DEVLINK_PARAM_GENERIC(ENABLE_VNET, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL);
+
+static int mlx5_devlink_vnet_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!mlx5_vnet_supported(dev))
+ return 0;
+
+ err = devlink_param_register(devlink, &enable_vnet_param);
+ if (err)
+ return err;
+
+ value.vbool = true;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ value);
+ return 0;
+}
+
+static void mlx5_devlink_vnet_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!mlx5_vnet_supported(dev))
+ return;
+
+ devlink_param_unregister(devlink, &enable_vnet_param);
+}
+
+static int mlx5_devlink_auxdev_params_register(struct devlink *devlink)
+{
+ int err;
+
+ err = mlx5_devlink_eth_param_register(devlink);
+ if (err)
+ return err;
+
+ err = mlx5_devlink_rdma_param_register(devlink);
+ if (err)
+ goto rdma_err;
+
+ err = mlx5_devlink_vnet_param_register(devlink);
+ if (err)
+ goto vnet_err;
+ return 0;
+
+vnet_err:
+ mlx5_devlink_rdma_param_unregister(devlink);
+rdma_err:
+ mlx5_devlink_eth_param_unregister(devlink);
+ return err;
+}
+
+static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink)
+{
+ mlx5_devlink_vnet_param_unregister(devlink);
+ mlx5_devlink_rdma_param_unregister(devlink);
+ mlx5_devlink_eth_param_unregister(devlink);
+}
+
+static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (val.vu32 == 0) {
+ NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0");
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(val.vu32)) {
+ NL_SET_ERR_MSG_MOD(extack, "Only power of 2 values are supported for max_macs");
+ return -EINVAL;
+ }
+
+ if (ilog2(val.vu32) >
+ MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) {
+ NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct devlink_param max_uc_list_param =
+ DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx5_devlink_max_uc_list_validate);
+
+static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ int err;
+
+ if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported))
+ return 0;
+
+ err = devlink_param_register(devlink, &max_uc_list_param);
+ if (err)
+ return err;
+
+ value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list);
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ value);
+ return 0;
+}
+
+static void
+mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported))
+ return;
+
+ devlink_param_unregister(devlink, &max_uc_list_param);
+}
+
+#define MLX5_TRAP_DROP(_id, _group_id) \
+ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \
+ DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \
+ DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT)
+
+static const struct devlink_trap mlx5_traps_arr[] = {
+ MLX5_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
+ MLX5_TRAP_DROP(DMAC_FILTER, L2_DROPS),
+};
+
+static const struct devlink_trap_group mlx5_trap_groups_arr[] = {
+ DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0),
+};
+
+static int mlx5_devlink_traps_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *core_dev = devlink_priv(devlink);
+ int err;
+
+ err = devl_trap_groups_register(devlink, mlx5_trap_groups_arr,
+ ARRAY_SIZE(mlx5_trap_groups_arr));
+ if (err)
+ return err;
+
+ err = devl_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr),
+ &core_dev->priv);
+ if (err)
+ goto err_trap_group;
+ return 0;
+
+err_trap_group:
+ devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
+ ARRAY_SIZE(mlx5_trap_groups_arr));
+ return err;
+}
+
+static void mlx5_devlink_traps_unregister(struct devlink *devlink)
+{
+ devl_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr));
+ devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
+ ARRAY_SIZE(mlx5_trap_groups_arr));
+}
+
+int mlx5_devlink_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int err;
+
+ err = devlink_params_register(devlink, mlx5_devlink_params,
+ ARRAY_SIZE(mlx5_devlink_params));
+ if (err)
+ return err;
+
+ mlx5_devlink_set_params_init_values(devlink);
+
+ err = mlx5_devlink_auxdev_params_register(devlink);
+ if (err)
+ goto auxdev_reg_err;
+
+ err = mlx5_devlink_max_uc_list_param_register(devlink);
+ if (err)
+ goto max_uc_list_err;
+
+ err = mlx5_devlink_traps_register(devlink);
+ if (err)
+ goto traps_reg_err;
+
+ if (!mlx5_core_is_mp_slave(dev))
+ devlink_set_features(devlink, DEVLINK_F_RELOAD);
+
+ return 0;
+
+traps_reg_err:
+ mlx5_devlink_max_uc_list_param_unregister(devlink);
+max_uc_list_err:
+ mlx5_devlink_auxdev_params_unregister(devlink);
+auxdev_reg_err:
+ devlink_params_unregister(devlink, mlx5_devlink_params,
+ ARRAY_SIZE(mlx5_devlink_params));
+ return err;
+}
+
+void mlx5_devlink_unregister(struct devlink *devlink)
+{
+ mlx5_devlink_traps_unregister(devlink);
+ mlx5_devlink_max_uc_list_param_unregister(devlink);
+ mlx5_devlink_auxdev_params_unregister(devlink);
+ devlink_params_unregister(devlink, mlx5_devlink_params,
+ ARRAY_SIZE(mlx5_devlink_params));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
new file mode 100644
index 000000000..30bf48827
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef __MLX5_DEVLINK_H__
+#define __MLX5_DEVLINK_H__
+
+#include <net/devlink.h>
+
+enum mlx5_devlink_param_id {
+ MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
+ MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
+ MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
+};
+
+struct mlx5_trap_ctx {
+ int id;
+ int action;
+};
+
+struct mlx5_devlink_trap {
+ struct mlx5_trap_ctx trap;
+ void *item;
+ struct list_head list;
+};
+
+struct mlx5_core_dev;
+void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb,
+ struct devlink_port *dl_port);
+int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev);
+int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
+ enum devlink_trap_action *action);
+
+struct devlink *mlx5_devlink_alloc(struct device *dev);
+void mlx5_devlink_free(struct devlink *devlink);
+int mlx5_devlink_register(struct devlink *devlink);
+void mlx5_devlink_unregister(struct devlink *devlink);
+
+#endif /* __MLX5_DEVLINK_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h
new file mode 100644
index 000000000..406ebe174
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_CMD_TP_H_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+TRACE_EVENT(mlx5_cmd,
+ TP_PROTO(const char *command_str, u16 opcode, u16 op_mod,
+ const char *status_str, u8 status, u32 syndrome, int err),
+ TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err),
+ TP_STRUCT__entry(__string(command_str, command_str)
+ __field(u16, opcode)
+ __field(u16, op_mod)
+ __string(status_str, status_str)
+ __field(u8, status)
+ __field(u32, syndrome)
+ __field(int, err)
+ ),
+ TP_fast_assign(__assign_str(command_str, command_str);
+ __entry->opcode = opcode;
+ __entry->op_mod = op_mod;
+ __assign_str(status_str, status_str);
+ __entry->status = status;
+ __entry->syndrome = syndrome;
+ __entry->err = err;
+ ),
+ TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)",
+ __get_str(command_str), __entry->opcode, __entry->op_mod,
+ __get_str(status_str), __entry->status, __entry->syndrome,
+ __entry->err)
+);
+
+#endif /* _MLX5_CMD_TP_H_ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE cmd_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
new file mode 100644
index 000000000..28d02749d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/pci_vsc.h"
+#include "lib/mlx5.h"
+
+#define BAD_ACCESS 0xBADACCE5
+#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7
+
+static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev)
+{
+ return !!dev->priv.health.crdump_size;
+}
+
+static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+ u32 crdump_size = dev->priv.health.crdump_size;
+ int i, ret;
+
+ for (i = 0; i < (crdump_size / 4); i++)
+ cr_data[i] = BAD_ACCESS;
+
+ ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size);
+ if (ret <= 0) {
+ if (ret == 0)
+ return -EIO;
+ return ret;
+ }
+
+ if (crdump_size != ret) {
+ mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n",
+ ret, crdump_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+ int ret;
+
+ if (!mlx5_crdump_enabled(dev))
+ return -ENODEV;
+
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret) {
+ mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n",
+ ret);
+ return ret;
+ }
+ /* Verify no other PF is running cr-dump or sw reset */
+ ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET,
+ MLX5_VSC_LOCK);
+ if (ret) {
+ mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+ goto unlock_gw;
+ }
+
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL);
+ if (ret)
+ goto unlock_sem;
+
+ ret = mlx5_crdump_fill(dev, cr_data);
+
+unlock_sem:
+ mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK);
+unlock_gw:
+ mlx5_vsc_gw_unlock(dev);
+ return ret;
+}
+
+int mlx5_crdump_enable(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ u32 space_size;
+ int ret;
+
+ if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) ||
+ mlx5_crdump_enabled(dev))
+ return 0;
+
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret)
+ return ret;
+
+ /* Check if space is supported and get space size */
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE,
+ &space_size);
+ if (ret) {
+ /* Unlock and mask error since space is not supported */
+ mlx5_vsc_gw_unlock(dev);
+ return 0;
+ }
+
+ if (!space_size) {
+ mlx5_core_warn(dev, "Invalid Crspace size, zero\n");
+ mlx5_vsc_gw_unlock(dev);
+ return -EINVAL;
+ }
+
+ ret = mlx5_vsc_gw_unlock(dev);
+ if (ret)
+ return ret;
+
+ priv->health.crdump_size = space_size;
+ return 0;
+}
+
+void mlx5_crdump_disable(struct mlx5_core_dev *dev)
+{
+ dev->priv.health.crdump_size = 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h
new file mode 100644
index 000000000..f15718db5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_EN_REP_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_EN_REP_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include "en_rep.h"
+
+TRACE_EVENT(mlx5e_rep_neigh_update,
+ TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha,
+ bool neigh_connected),
+ TP_ARGS(nhe, ha, neigh_connected),
+ TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name)
+ __array(u8, ha, ETH_ALEN)
+ __array(u8, v4, 4)
+ __array(u8, v6, 16)
+ __field(bool, neigh_connected)
+ ),
+ TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh;
+ struct in6_addr *pin6;
+ __be32 *p32;
+
+ __assign_str(devname, nhe->neigh_dev->name);
+ __entry->neigh_connected = neigh_connected;
+ memcpy(__entry->ha, ha, ETH_ALEN);
+
+ p32 = (__be32 *)__entry->v4;
+ pin6 = (struct in6_addr *)__entry->v6;
+ if (mn->family == AF_INET) {
+ *p32 = mn->dst_ip.v4;
+ ipv6_addr_set_v4mapped(*p32, pin6);
+ } else if (mn->family == AF_INET6) {
+ *pin6 = mn->dst_ip.v6;
+ }
+ ),
+ TP_printk("netdev: %s MAC: %pM IPv4: %pI4 IPv6: %pI6c neigh_connected=%d\n",
+ __get_str(devname), __entry->ha,
+ __entry->v4, __entry->v6, __entry->neigh_connected
+ )
+);
+
+#endif /* _MLX5_EN_REP_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE en_rep_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c
new file mode 100644
index 000000000..c5dc6c50f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#define CREATE_TRACE_POINTS
+#include "en_tc_tracepoint.h"
+
+void put_ids_to_array(int *ids,
+ const struct flow_action_entry *entries,
+ unsigned int num)
+{
+ unsigned int i;
+
+ for (i = 0; i < num; i++)
+ ids[i] = entries[i].id;
+}
+
+#define NAME_SIZE 16
+
+static const char FLOWACT2STR[NUM_FLOW_ACTIONS][NAME_SIZE] = {
+ [FLOW_ACTION_ACCEPT] = "ACCEPT",
+ [FLOW_ACTION_DROP] = "DROP",
+ [FLOW_ACTION_TRAP] = "TRAP",
+ [FLOW_ACTION_GOTO] = "GOTO",
+ [FLOW_ACTION_REDIRECT] = "REDIRECT",
+ [FLOW_ACTION_MIRRED] = "MIRRED",
+ [FLOW_ACTION_VLAN_PUSH] = "VLAN_PUSH",
+ [FLOW_ACTION_VLAN_POP] = "VLAN_POP",
+ [FLOW_ACTION_VLAN_MANGLE] = "VLAN_MANGLE",
+ [FLOW_ACTION_TUNNEL_ENCAP] = "TUNNEL_ENCAP",
+ [FLOW_ACTION_TUNNEL_DECAP] = "TUNNEL_DECAP",
+ [FLOW_ACTION_MANGLE] = "MANGLE",
+ [FLOW_ACTION_ADD] = "ADD",
+ [FLOW_ACTION_CSUM] = "CSUM",
+ [FLOW_ACTION_MARK] = "MARK",
+ [FLOW_ACTION_WAKE] = "WAKE",
+ [FLOW_ACTION_QUEUE] = "QUEUE",
+ [FLOW_ACTION_SAMPLE] = "SAMPLE",
+ [FLOW_ACTION_POLICE] = "POLICE",
+ [FLOW_ACTION_CT] = "CT",
+};
+
+const char *parse_action(struct trace_seq *p,
+ int *ids,
+ unsigned int num)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ unsigned int i;
+
+ for (i = 0; i < num; i++) {
+ if (ids[i] < NUM_FLOW_ACTIONS)
+ trace_seq_printf(p, "%s ", FLOWACT2STR[ids[i]]);
+ else
+ trace_seq_printf(p, "UNKNOWN ");
+ }
+
+ trace_seq_putc(p, 0);
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
new file mode 100644
index 000000000..ac52ef37f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_TC_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_TC_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include <net/flow_offload.h>
+#include "en_rep.h"
+
+#define __parse_action(ids, num) parse_action(p, ids, num)
+
+void put_ids_to_array(int *ids,
+ const struct flow_action_entry *entries,
+ unsigned int num);
+
+const char *parse_action(struct trace_seq *p,
+ int *ids,
+ unsigned int num);
+
+DECLARE_EVENT_CLASS(mlx5e_flower_template,
+ TP_PROTO(const struct flow_cls_offload *f),
+ TP_ARGS(f),
+ TP_STRUCT__entry(__field(void *, cookie)
+ __field(unsigned int, num)
+ __dynamic_array(int, ids, f->rule ?
+ f->rule->action.num_entries : 0)
+ ),
+ TP_fast_assign(__entry->cookie = (void *)f->cookie;
+ __entry->num = (f->rule ?
+ f->rule->action.num_entries : 0);
+ if (__entry->num)
+ put_ids_to_array(__get_dynamic_array(ids),
+ f->rule->action.entries,
+ f->rule->action.num_entries);
+ ),
+ TP_printk("cookie=%p actions= %s\n",
+ __entry->cookie, __entry->num ?
+ __parse_action(__get_dynamic_array(ids),
+ __entry->num) : "NULL"
+ )
+);
+
+DEFINE_EVENT(mlx5e_flower_template, mlx5e_configure_flower,
+ TP_PROTO(const struct flow_cls_offload *f),
+ TP_ARGS(f)
+ );
+
+DEFINE_EVENT(mlx5e_flower_template, mlx5e_delete_flower,
+ TP_PROTO(const struct flow_cls_offload *f),
+ TP_ARGS(f)
+ );
+
+TRACE_EVENT(mlx5e_stats_flower,
+ TP_PROTO(const struct flow_cls_offload *f),
+ TP_ARGS(f),
+ TP_STRUCT__entry(__field(void *, cookie)
+ __field(u64, bytes)
+ __field(u64, packets)
+ __field(u64, lastused)
+ ),
+ TP_fast_assign(__entry->cookie = (void *)f->cookie;
+ __entry->bytes = f->stats.bytes;
+ __entry->packets = f->stats.pkts;
+ __entry->lastused = f->stats.lastused;
+ ),
+ TP_printk("cookie=%p bytes=%llu packets=%llu lastused=%llu\n",
+ __entry->cookie, __entry->bytes,
+ __entry->packets, __entry->lastused
+ )
+);
+
+TRACE_EVENT(mlx5e_tc_update_neigh_used_value,
+ TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used),
+ TP_ARGS(nhe, neigh_used),
+ TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name)
+ __array(u8, v4, 4)
+ __array(u8, v6, 16)
+ __field(bool, neigh_used)
+ ),
+ TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh;
+ struct in6_addr *pin6;
+ __be32 *p32;
+
+ __assign_str(devname, nhe->neigh_dev->name);
+ __entry->neigh_used = neigh_used;
+
+ p32 = (__be32 *)__entry->v4;
+ pin6 = (struct in6_addr *)__entry->v6;
+ if (mn->family == AF_INET) {
+ *p32 = mn->dst_ip.v4;
+ ipv6_addr_set_v4mapped(*p32, pin6);
+ } else if (mn->family == AF_INET6) {
+ *pin6 = mn->dst_ip.v6;
+ }
+ ),
+ TP_printk("netdev: %s IPv4: %pI4 IPv6: %pI6c neigh_used=%d\n",
+ __get_str(devname), __entry->v4, __entry->v6,
+ __entry->neigh_used
+ )
+);
+
+#endif /* _MLX5_TC_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE en_tc_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
new file mode 100644
index 000000000..c5bb79a4f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include "fs_tracepoint.h"
+#include <linux/stringify.h>
+
+#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name
+#define MASK_VAL(type, spec, name, mask, val, fld) \
+ DECLARE_MASK_VAL(type, name) = \
+ {.m = MLX5_GET(spec, mask, fld),\
+ .v = MLX5_GET(spec, val, fld)}
+#define MASK_VAL_BE(type, spec, name, mask, val, fld) \
+ DECLARE_MASK_VAL(type, name) = \
+ {.m = MLX5_GET_BE(type, spec, mask, fld),\
+ .v = MLX5_GET_BE(type, spec, val, fld)}
+#define GET_MASKED_VAL(name) (name.m & name.v)
+
+#define GET_MASK_VAL(name, type, mask, val, fld) \
+ (name.m = MLX5_GET(type, mask, fld), \
+ name.v = MLX5_GET(type, val, fld), \
+ name.m & name.v)
+#define PRINT_MASKED_VAL(name, p, format) { \
+ if (name.m) \
+ trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \
+ }
+#define PRINT_MASKED_VALP(name, cast, p, format) { \
+ if (name.m) \
+ trace_seq_printf(p, __stringify(name) "=" format " ", \
+ (cast)&name.v);\
+ }
+
+static void print_lyr_2_4_hdrs(struct trace_seq *p,
+ const u32 *mask, const u32 *value)
+{
+#define MASK_VAL_L2(type, name, fld) \
+ MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld)
+ DECLARE_MASK_VAL(u64, smac) = {
+ .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0),
+ .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)};
+ DECLARE_MASK_VAL(u64, dmac) = {
+ .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0),
+ .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)};
+ MASK_VAL_L2(u16, ethertype, ethertype);
+ MASK_VAL_L2(u8, ip_version, ip_version);
+
+ PRINT_MASKED_VALP(smac, u8 *, p, "%pM");
+ PRINT_MASKED_VALP(dmac, u8 *, p, "%pM");
+ PRINT_MASKED_VAL(ethertype, p, "%04x");
+
+ if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IP) ||
+ (ip_version.m == 0xf && ip_version.v == 4)) {
+#define MASK_VAL_L2_BE(type, name, fld) \
+ MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld)
+ MASK_VAL_L2_BE(u32, src_ipv4,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MASK_VAL_L2_BE(u32, dst_ipv4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
+ "%pI4");
+ PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
+ "%pI4");
+ } else if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IPV6) ||
+ (ip_version.m == 0xf && ip_version.v == 6)) {
+ static const struct in6_addr full_ones = {
+ .in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff)},
+ };
+ DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
+ DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
+
+ memcpy(src_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.m));
+ memcpy(dst_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.m));
+ memcpy(src_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.v));
+ memcpy(dst_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.v));
+
+ if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "src_ipv6=%pI6 ",
+ &src_ipv6.v);
+ if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "dst_ipv6=%pI6 ",
+ &dst_ipv6.v);
+ }
+
+#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\
+ MASK_VAL_L2(type, name, fld); \
+ PRINT_MASKED_VAL(name, p, format); \
+}
+
+ PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x");
+ PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x");
+ PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x");
+ PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x");
+ PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d");
+ PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x");
+ PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x");
+ PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d");
+ PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d");
+ PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d");
+}
+
+static void print_misc_parameters_hdrs(struct trace_seq *p,
+ const u32 *mask, const u32 *value)
+{
+#define MASK_VAL_MISC(type, name, fld) \
+ MASK_VAL(type, fte_match_set_misc, name, mask, value, fld)
+#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\
+ MASK_VAL_MISC(type, name, fld); \
+ PRINT_MASKED_VAL(name, p, format); \
+}
+ DECLARE_MASK_VAL(u64, gre_key) = {
+ .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 |
+ MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo),
+ .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 |
+ MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)};
+
+ PRINT_MASKED_VAL(gre_key, p, "%llu");
+ PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
+ PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio,
+ p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u");
+ PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio,
+ p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u");
+ PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u");
+
+ PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag,
+ outer_second_cvlan_tag, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag,
+ inner_second_cvlan_tag, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag,
+ outer_second_svlan_tag, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag,
+ inner_second_svlan_tag, p, "%u");
+
+ PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u");
+
+ PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u");
+ PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label,
+ p, "%x");
+ PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label,
+ p, "%x");
+}
+
+const char *parse_fs_hdrs(struct trace_seq *p,
+ u8 match_criteria_enable,
+ const u32 *mask_outer,
+ const u32 *mask_misc,
+ const u32 *mask_inner,
+ const u32 *value_outer,
+ const u32 *value_misc,
+ const u32 *value_inner)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (match_criteria_enable &
+ 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
+ trace_seq_printf(p, "[outer] ");
+ print_lyr_2_4_hdrs(p, mask_outer, value_outer);
+ }
+
+ if (match_criteria_enable &
+ 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
+ trace_seq_printf(p, "[misc] ");
+ print_misc_parameters_hdrs(p, mask_misc, value_misc);
+ }
+ if (match_criteria_enable &
+ 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
+ trace_seq_printf(p, "[inner] ");
+ print_lyr_2_4_hdrs(p, mask_inner, value_inner);
+ }
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+const char *parse_fs_dst(struct trace_seq *p,
+ const struct mlx5_flow_destination *dst,
+ u32 counter_id)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ switch (dst->type) {
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
+ trace_seq_printf(p, "uplink\n");
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ trace_seq_printf(p, "vport=%u\n", dst->vport.num);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ trace_seq_printf(p, "ft=%p\n", dst->ft);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ trace_seq_printf(p, "ft_num=%u\n", dst->ft_num);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_TIR:
+ trace_seq_printf(p, "tir=%u\n", dst->tir_num);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+ trace_seq_printf(p, "sampler_id=%u\n", dst->sampler_id);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
+ trace_seq_printf(p, "counter_id=%u\n", counter_id);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_PORT:
+ trace_seq_printf(p, "port\n");
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_NONE:
+ trace_seq_printf(p, "none\n");
+ break;
+ }
+
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_ft);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_ft);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule);
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
new file mode 100644
index 000000000..ddf1b87f1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_FS_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include "../fs_core.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \
+ vinner, vmisc) \
+ parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\
+ vinner, vmisc)
+
+const char *parse_fs_hdrs(struct trace_seq *p,
+ u8 match_criteria_enable,
+ const u32 *mask_outer,
+ const u32 *mask_misc,
+ const u32 *mask_inner,
+ const u32 *value_outer,
+ const u32 *value_misc,
+ const u32 *value_inner);
+
+#define __parse_fs_dst(dst, counter_id) \
+ parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id)
+
+const char *parse_fs_dst(struct trace_seq *p,
+ const struct mlx5_flow_destination *dst,
+ u32 counter_id);
+
+TRACE_EVENT(mlx5_fs_add_ft,
+ TP_PROTO(const struct mlx5_flow_table *ft),
+ TP_ARGS(ft),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_table *, ft)
+ __field(u32, id)
+ __field(u32, level)
+ __field(u32, type)
+ ),
+ TP_fast_assign(
+ __entry->ft = ft;
+ __entry->id = ft->id;
+ __entry->level = ft->level;
+ __entry->type = ft->type;
+ ),
+ TP_printk("ft=%p id=%u level=%u type=%u \n",
+ __entry->ft, __entry->id, __entry->level, __entry->type)
+ );
+
+TRACE_EVENT(mlx5_fs_del_ft,
+ TP_PROTO(const struct mlx5_flow_table *ft),
+ TP_ARGS(ft),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_table *, ft)
+ __field(u32, id)
+ ),
+ TP_fast_assign(
+ __entry->ft = ft;
+ __entry->id = ft->id;
+
+ ),
+ TP_printk("ft=%p id=%u\n",
+ __entry->ft, __entry->id)
+ );
+
+TRACE_EVENT(mlx5_fs_add_fg,
+ TP_PROTO(const struct mlx5_flow_group *fg),
+ TP_ARGS(fg),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_group *, fg)
+ __field(const struct mlx5_flow_table *, ft)
+ __field(u32, start_index)
+ __field(u32, end_index)
+ __field(u32, id)
+ __field(u8, mask_enable)
+ __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+ ),
+ TP_fast_assign(
+ __entry->fg = fg;
+ fs_get_obj(__entry->ft, fg->node.parent);
+ __entry->start_index = fg->start_index;
+ __entry->end_index = fg->start_index + fg->max_ftes;
+ __entry->id = fg->id;
+ __entry->mask_enable = fg->mask.match_criteria_enable;
+ memcpy(__entry->mask_outer,
+ MLX5_ADDR_OF(fte_match_param,
+ &fg->mask.match_criteria,
+ outer_headers),
+ sizeof(__entry->mask_outer));
+ memcpy(__entry->mask_inner,
+ MLX5_ADDR_OF(fte_match_param,
+ &fg->mask.match_criteria,
+ inner_headers),
+ sizeof(__entry->mask_inner));
+ memcpy(__entry->mask_misc,
+ MLX5_ADDR_OF(fte_match_param,
+ &fg->mask.match_criteria,
+ misc_parameters),
+ sizeof(__entry->mask_misc));
+
+ ),
+ TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n",
+ __entry->fg, __entry->ft, __entry->id,
+ __entry->start_index, __entry->end_index,
+ __entry->mask_enable,
+ __parse_fs_hdrs(__entry->mask_enable,
+ __entry->mask_outer,
+ __entry->mask_misc,
+ __entry->mask_inner,
+ __entry->mask_outer,
+ __entry->mask_misc,
+ __entry->mask_inner))
+ );
+
+TRACE_EVENT(mlx5_fs_del_fg,
+ TP_PROTO(const struct mlx5_flow_group *fg),
+ TP_ARGS(fg),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_group *, fg)
+ __field(u32, id)
+ ),
+ TP_fast_assign(
+ __entry->fg = fg;
+ __entry->id = fg->id;
+
+ ),
+ TP_printk("fg=%p id=%u\n",
+ __entry->fg, __entry->id)
+ );
+
+#define ACTION_FLAGS \
+ {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\
+ {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\
+ {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\
+ {MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\
+ {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP, "VLAN_POP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2, "VLAN_PUSH_2"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2, "VLAN_POP_2"},\
+ {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"}
+
+TRACE_EVENT(mlx5_fs_set_fte,
+ TP_PROTO(const struct fs_fte *fte, int new_fte),
+ TP_ARGS(fte, new_fte),
+ TP_STRUCT__entry(
+ __field(const struct fs_fte *, fte)
+ __field(const struct mlx5_flow_group *, fg)
+ __field(u32, group_index)
+ __field(u32, index)
+ __field(u32, action)
+ __field(u32, flow_tag)
+ __field(u32, flow_source)
+ __field(u8, mask_enable)
+ __field(int, new_fte)
+ __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+ __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+ ),
+ TP_fast_assign(
+ __entry->fte = fte;
+ __entry->new_fte = new_fte;
+ fs_get_obj(__entry->fg, fte->node.parent);
+ __entry->group_index = __entry->fg->id;
+ __entry->index = fte->index;
+ __entry->action = fte->action.action;
+ __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
+ __entry->flow_tag = fte->flow_context.flow_tag;
+ __entry->flow_source = fte->flow_context.flow_source;
+ memcpy(__entry->mask_outer,
+ MLX5_ADDR_OF(fte_match_param,
+ &__entry->fg->mask.match_criteria,
+ outer_headers),
+ sizeof(__entry->mask_outer));
+ memcpy(__entry->mask_inner,
+ MLX5_ADDR_OF(fte_match_param,
+ &__entry->fg->mask.match_criteria,
+ inner_headers),
+ sizeof(__entry->mask_inner));
+ memcpy(__entry->mask_misc,
+ MLX5_ADDR_OF(fte_match_param,
+ &__entry->fg->mask.match_criteria,
+ misc_parameters),
+ sizeof(__entry->mask_misc));
+ memcpy(__entry->value_outer,
+ MLX5_ADDR_OF(fte_match_param,
+ &fte->val,
+ outer_headers),
+ sizeof(__entry->value_outer));
+ memcpy(__entry->value_inner,
+ MLX5_ADDR_OF(fte_match_param,
+ &fte->val,
+ inner_headers),
+ sizeof(__entry->value_inner));
+ memcpy(__entry->value_misc,
+ MLX5_ADDR_OF(fte_match_param,
+ &fte->val,
+ misc_parameters),
+ sizeof(__entry->value_misc));
+
+ ),
+ TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n",
+ __entry->new_fte ? "add" : "set",
+ __entry->fte, __entry->fg, __entry->index,
+ __entry->group_index, __print_flags(__entry->action, "|",
+ ACTION_FLAGS),
+ __entry->flow_tag,
+ __parse_fs_hdrs(__entry->mask_enable,
+ __entry->mask_outer,
+ __entry->mask_misc,
+ __entry->mask_inner,
+ __entry->value_outer,
+ __entry->value_misc,
+ __entry->value_inner))
+ );
+
+TRACE_EVENT(mlx5_fs_del_fte,
+ TP_PROTO(const struct fs_fte *fte),
+ TP_ARGS(fte),
+ TP_STRUCT__entry(
+ __field(const struct fs_fte *, fte)
+ __field(u32, index)
+ ),
+ TP_fast_assign(
+ __entry->fte = fte;
+ __entry->index = fte->index;
+
+ ),
+ TP_printk("fte=%p index=%u\n",
+ __entry->fte, __entry->index)
+ );
+
+TRACE_EVENT(mlx5_fs_add_rule,
+ TP_PROTO(const struct mlx5_flow_rule *rule),
+ TP_ARGS(rule),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_rule *, rule)
+ __field(const struct fs_fte *, fte)
+ __field(u32, sw_action)
+ __field(u32, index)
+ __field(u32, counter_id)
+ __array(u8, destination, sizeof(struct mlx5_flow_destination))
+ ),
+ TP_fast_assign(
+ __entry->rule = rule;
+ fs_get_obj(__entry->fte, rule->node.parent);
+ __entry->index = __entry->fte->dests_size - 1;
+ __entry->sw_action = rule->sw_action;
+ memcpy(__entry->destination,
+ &rule->dest_attr,
+ sizeof(__entry->destination));
+ if (rule->dest_attr.type &
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ __entry->counter_id =
+ rule->dest_attr.counter_id;
+ ),
+ TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n",
+ __entry->rule, __entry->fte, __entry->index,
+ __print_flags(__entry->sw_action, "|", ACTION_FLAGS),
+ __parse_fs_dst(__entry->destination, __entry->counter_id))
+ );
+
+TRACE_EVENT(mlx5_fs_del_rule,
+ TP_PROTO(const struct mlx5_flow_rule *rule),
+ TP_ARGS(rule),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_rule *, rule)
+ __field(const struct fs_fte *, fte)
+ ),
+ TP_fast_assign(
+ __entry->rule = rule;
+ fs_get_obj(__entry->fte, rule->node.parent);
+ ),
+ TP_printk("rule=%p fte=%p\n",
+ __entry->rule, __entry->fte)
+ );
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE fs_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
new file mode 100644
index 000000000..3ba54ffa5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -0,0 +1,1154 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#define CREATE_TRACE_POINTS
+#include "lib/eq.h"
+#include "fw_tracer.h"
+#include "fw_tracer_tracepoint.h"
+
+static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
+{
+ u32 *string_db_base_address_out = tracer->str_db.base_address_out;
+ u32 *string_db_size_out = tracer->str_db.size_out;
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ void *mtrc_cap_sp;
+ int err, i;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CAP, 0, 0);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Error reading tracer caps %d\n",
+ err);
+ return err;
+ }
+
+ if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) {
+ mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n");
+ return -ENOTSUPP;
+ }
+
+ tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver);
+ tracer->str_db.first_string_trace =
+ MLX5_GET(mtrc_cap, out, first_string_trace);
+ tracer->str_db.num_string_trace =
+ MLX5_GET(mtrc_cap, out, num_string_trace);
+ tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db);
+ tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+ tracer->str_db.loaded = false;
+
+ for (i = 0; i < tracer->str_db.num_string_db; i++) {
+ mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]);
+ string_db_base_address_out[i] = MLX5_GET(mtrc_string_db_param,
+ mtrc_cap_sp,
+ string_db_base_address);
+ string_db_size_out[i] = MLX5_GET(mtrc_string_db_param,
+ mtrc_cap_sp, string_db_size);
+ }
+
+ return err;
+}
+
+static int mlx5_set_mtrc_caps_trace_owner(struct mlx5_fw_tracer *tracer,
+ u32 *out, u32 out_size,
+ u8 trace_owner)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+ MLX5_SET(mtrc_cap, in, trace_owner, trace_owner);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, out_size,
+ MLX5_REG_MTRC_CAP, 0, 1);
+}
+
+static int mlx5_fw_tracer_ownership_acquire(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ int err;
+
+ err = mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+ MLX5_FW_TRACER_ACQUIRE_OWNERSHIP);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Acquire tracer ownership failed %d\n",
+ err);
+ return err;
+ }
+
+ tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+ if (!tracer->owner)
+ return -EBUSY;
+
+ return 0;
+}
+
+static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer)
+{
+ u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+ mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+ MLX5_FW_TRACER_RELEASE_OWNERSHIP);
+ tracer->owner = false;
+}
+
+static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct device *ddev;
+ dma_addr_t dma;
+ void *buff;
+ gfp_t gfp;
+ int err;
+
+ tracer->buff.size = TRACE_BUFFER_SIZE_BYTE;
+
+ gfp = GFP_KERNEL | __GFP_ZERO;
+ buff = (void *)__get_free_pages(gfp,
+ get_order(tracer->buff.size));
+ if (!buff) {
+ err = -ENOMEM;
+ mlx5_core_warn(dev, "FWTracer: Failed to allocate pages, %d\n", err);
+ return err;
+ }
+ tracer->buff.log_buf = buff;
+
+ ddev = mlx5_core_dma_dev(dev);
+ dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
+ mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n",
+ dma_mapping_error(ddev, dma));
+ err = -ENOMEM;
+ goto free_pages;
+ }
+ tracer->buff.dma = dma;
+
+ return 0;
+
+free_pages:
+ free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
+
+ return err;
+}
+
+static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct device *ddev;
+
+ if (!tracer->buff.log_buf)
+ return;
+
+ ddev = mlx5_core_dma_dev(dev);
+ dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE);
+ free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
+}
+
+static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ int err, inlen, i;
+ __be64 *mtt;
+ void *mkc;
+ u32 *in;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+ sizeof(*mtt) * round_up(TRACER_BUFFER_PAGE_NUM, 2);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+ DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++)
+ mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE);
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, pd, tracer->buff.pdn);
+ MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
+ MLX5_SET64(mkc, mkc, start_addr, tracer->buff.dma);
+ MLX5_SET64(mkc, mkc, len, tracer->buff.size);
+ err = mlx5_core_create_mkey(dev, &tracer->buff.mkey, in, inlen);
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to create mkey, %d\n", err);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5_fw_tracer_free_strings_db(struct mlx5_fw_tracer *tracer)
+{
+ u32 num_string_db = tracer->str_db.num_string_db;
+ int i;
+
+ for (i = 0; i < num_string_db; i++) {
+ kfree(tracer->str_db.buffer[i]);
+ tracer->str_db.buffer[i] = NULL;
+ }
+}
+
+static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer)
+{
+ u32 *string_db_size_out = tracer->str_db.size_out;
+ u32 num_string_db = tracer->str_db.num_string_db;
+ int i;
+
+ for (i = 0; i < num_string_db; i++) {
+ tracer->str_db.buffer[i] = kzalloc(string_db_size_out[i], GFP_KERNEL);
+ if (!tracer->str_db.buffer[i])
+ goto free_strings_db;
+ }
+
+ return 0;
+
+free_strings_db:
+ mlx5_fw_tracer_free_strings_db(tracer);
+ return -ENOMEM;
+}
+
+static void
+mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+ tracer->st_arr.saved_traces_index = 0;
+ mutex_init(&tracer->st_arr.lock);
+}
+
+static void
+mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+ mutex_destroy(&tracer->st_arr.lock);
+}
+
+static void mlx5_tracer_read_strings_db(struct work_struct *work)
+{
+ struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
+ read_fw_strings_work);
+ u32 num_of_reads, num_string_db = tracer->str_db.num_string_db;
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ u32 leftovers, offset;
+ int err = 0, i, j;
+ u32 *out, outlen;
+ void *out_value;
+
+ outlen = MLX5_ST_SZ_BYTES(mtrc_stdb) + STRINGS_DB_READ_SIZE_BYTES;
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < num_string_db; i++) {
+ offset = 0;
+ MLX5_SET(mtrc_stdb, in, string_db_index, i);
+ num_of_reads = tracer->str_db.size_out[i] /
+ STRINGS_DB_READ_SIZE_BYTES;
+ leftovers = (tracer->str_db.size_out[i] %
+ STRINGS_DB_READ_SIZE_BYTES) /
+ STRINGS_DB_LEFTOVER_SIZE_BYTES;
+
+ MLX5_SET(mtrc_stdb, in, read_size, STRINGS_DB_READ_SIZE_BYTES);
+ for (j = 0; j < num_of_reads; j++) {
+ MLX5_SET(mtrc_stdb, in, start_offset, offset);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ outlen, MLX5_REG_MTRC_STDB,
+ 0, 1);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n",
+ err);
+ goto out_free;
+ }
+
+ out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data);
+ memcpy(tracer->str_db.buffer[i] + offset, out_value,
+ STRINGS_DB_READ_SIZE_BYTES);
+ offset += STRINGS_DB_READ_SIZE_BYTES;
+ }
+
+ /* Strings database is aligned to 64, need to read leftovers*/
+ MLX5_SET(mtrc_stdb, in, read_size,
+ STRINGS_DB_LEFTOVER_SIZE_BYTES);
+ for (j = 0; j < leftovers; j++) {
+ MLX5_SET(mtrc_stdb, in, start_offset, offset);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ outlen, MLX5_REG_MTRC_STDB,
+ 0, 1);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n",
+ err);
+ goto out_free;
+ }
+
+ out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data);
+ memcpy(tracer->str_db.buffer[i] + offset, out_value,
+ STRINGS_DB_LEFTOVER_SIZE_BYTES);
+ offset += STRINGS_DB_LEFTOVER_SIZE_BYTES;
+ }
+ }
+
+ tracer->str_db.loaded = true;
+
+out_free:
+ kfree(out);
+out:
+ return;
+}
+
+static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ int err;
+
+ MLX5_SET(mtrc_ctrl, in, arm_event, 1);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CTRL, 0, 1);
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err);
+}
+
+static const char *VAL_PARM = "%llx";
+static const char *REPLACE_64_VAL_PARM = "%x%x";
+static const char *PARAM_CHAR = "%";
+
+static int mlx5_tracer_message_hash(u32 message_id)
+{
+ return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1);
+}
+
+static struct tracer_string_format *mlx5_tracer_message_insert(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct hlist_head *head =
+ &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)];
+ struct tracer_string_format *cur_string;
+
+ cur_string = kzalloc(sizeof(*cur_string), GFP_KERNEL);
+ if (!cur_string)
+ return NULL;
+
+ hlist_add_head(&cur_string->hlist, head);
+
+ return cur_string;
+}
+
+static struct tracer_string_format *mlx5_tracer_get_string(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct tracer_string_format *cur_string;
+ u32 str_ptr, offset;
+ int i;
+
+ str_ptr = tracer_event->string_event.string_param;
+
+ for (i = 0; i < tracer->str_db.num_string_db; i++) {
+ if (str_ptr > tracer->str_db.base_address_out[i] &&
+ str_ptr < tracer->str_db.base_address_out[i] +
+ tracer->str_db.size_out[i]) {
+ offset = str_ptr - tracer->str_db.base_address_out[i];
+ /* add it to the hash */
+ cur_string = mlx5_tracer_message_insert(tracer, tracer_event);
+ if (!cur_string)
+ return NULL;
+ cur_string->string = (char *)(tracer->str_db.buffer[i] +
+ offset);
+ return cur_string;
+ }
+ }
+
+ return NULL;
+}
+
+static void mlx5_tracer_clean_message(struct tracer_string_format *str_frmt)
+{
+ hlist_del(&str_frmt->hlist);
+ kfree(str_frmt);
+}
+
+static int mlx5_tracer_get_num_of_params(char *str)
+{
+ char *substr, *pstr = str;
+ int num_of_params = 0;
+
+ /* replace %llx with %x%x */
+ substr = strstr(pstr, VAL_PARM);
+ while (substr) {
+ memcpy(substr, REPLACE_64_VAL_PARM, 4);
+ pstr = substr;
+ substr = strstr(pstr, VAL_PARM);
+ }
+
+ /* count all the % characters */
+ substr = strstr(str, PARAM_CHAR);
+ while (substr) {
+ num_of_params += 1;
+ str = substr + 1;
+ substr = strstr(str, PARAM_CHAR);
+ }
+
+ return num_of_params;
+}
+
+static struct tracer_string_format *mlx5_tracer_message_find(struct hlist_head *head,
+ u8 event_id, u32 tmsn)
+{
+ struct tracer_string_format *message;
+
+ hlist_for_each_entry(message, head, hlist)
+ if (message->event_id == event_id && message->tmsn == tmsn)
+ return message;
+
+ return NULL;
+}
+
+static struct tracer_string_format *mlx5_tracer_message_get(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct hlist_head *head =
+ &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)];
+
+ return mlx5_tracer_message_find(head, tracer_event->event_id, tracer_event->string_event.tmsn);
+}
+
+static void poll_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event, u64 *trace)
+{
+ u32 timestamp_low, timestamp_mid, timestamp_high, urts;
+
+ tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id);
+ tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost);
+
+ switch (tracer_event->event_id) {
+ case TRACER_EVENT_TYPE_TIMESTAMP:
+ tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP;
+ urts = MLX5_GET(tracer_timestamp_event, trace, urts);
+ if (tracer->trc_ver == 0)
+ tracer_event->timestamp_event.unreliable = !!(urts >> 2);
+ else
+ tracer_event->timestamp_event.unreliable = !!(urts & 1);
+
+ timestamp_low = MLX5_GET(tracer_timestamp_event,
+ trace, timestamp7_0);
+ timestamp_mid = MLX5_GET(tracer_timestamp_event,
+ trace, timestamp39_8);
+ timestamp_high = MLX5_GET(tracer_timestamp_event,
+ trace, timestamp52_40);
+
+ tracer_event->timestamp_event.timestamp =
+ ((u64)timestamp_high << 40) |
+ ((u64)timestamp_mid << 8) |
+ (u64)timestamp_low;
+ break;
+ default:
+ if (tracer_event->event_id >= tracer->str_db.first_string_trace &&
+ tracer_event->event_id <= tracer->str_db.first_string_trace +
+ tracer->str_db.num_string_trace) {
+ tracer_event->type = TRACER_EVENT_TYPE_STRING;
+ tracer_event->string_event.timestamp =
+ MLX5_GET(tracer_string_event, trace, timestamp);
+ tracer_event->string_event.string_param =
+ MLX5_GET(tracer_string_event, trace, string_param);
+ tracer_event->string_event.tmsn =
+ MLX5_GET(tracer_string_event, trace, tmsn);
+ tracer_event->string_event.tdsn =
+ MLX5_GET(tracer_string_event, trace, tdsn);
+ } else {
+ tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED;
+ }
+ break;
+ }
+}
+
+static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event)
+{
+ struct tracer_event tracer_event;
+ u8 event_id;
+
+ event_id = MLX5_GET(tracer_event, ts_event, event_id);
+
+ if (event_id == TRACER_EVENT_TYPE_TIMESTAMP)
+ poll_trace(tracer, &tracer_event, ts_event);
+ else
+ tracer_event.timestamp_event.timestamp = 0;
+
+ return tracer_event.timestamp_event.timestamp;
+}
+
+static void mlx5_fw_tracer_clean_print_hash(struct mlx5_fw_tracer *tracer)
+{
+ struct tracer_string_format *str_frmt;
+ struct hlist_node *n;
+ int i;
+
+ for (i = 0; i < MESSAGE_HASH_SIZE; i++) {
+ hlist_for_each_entry_safe(str_frmt, n, &tracer->hash[i], hlist)
+ mlx5_tracer_clean_message(str_frmt);
+ }
+}
+
+static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer)
+{
+ struct tracer_string_format *str_frmt, *tmp_str;
+
+ list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list,
+ list)
+ list_del(&str_frmt->list);
+}
+
+static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer,
+ u64 timestamp, bool lost,
+ u8 event_id, char *msg)
+{
+ struct mlx5_fw_trace_data *trace_data;
+
+ mutex_lock(&tracer->st_arr.lock);
+ trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index];
+ trace_data->timestamp = timestamp;
+ trace_data->lost = lost;
+ trace_data->event_id = event_id;
+ strscpy_pad(trace_data->msg, msg, TRACE_STR_MSG);
+
+ tracer->st_arr.saved_traces_index =
+ (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1);
+ mutex_unlock(&tracer->st_arr.lock);
+}
+
+static noinline
+void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
+ struct mlx5_core_dev *dev,
+ u64 trace_timestamp)
+{
+ char tmp[512];
+
+ snprintf(tmp, sizeof(tmp), str_frmt->string,
+ str_frmt->params[0],
+ str_frmt->params[1],
+ str_frmt->params[2],
+ str_frmt->params[3],
+ str_frmt->params[4],
+ str_frmt->params[5],
+ str_frmt->params[6]);
+
+ trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
+ str_frmt->event_id, tmp);
+
+ mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp,
+ str_frmt->lost, str_frmt->event_id, tmp);
+
+ /* remove it from hash */
+ mlx5_tracer_clean_message(str_frmt);
+}
+
+static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct tracer_string_format *cur_string;
+
+ if (tracer_event->string_event.tdsn == 0) {
+ cur_string = mlx5_tracer_get_string(tracer, tracer_event);
+ if (!cur_string)
+ return -1;
+
+ cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
+ cur_string->last_param_num = 0;
+ cur_string->event_id = tracer_event->event_id;
+ cur_string->tmsn = tracer_event->string_event.tmsn;
+ cur_string->timestamp = tracer_event->string_event.timestamp;
+ cur_string->lost = tracer_event->lost_event;
+ if (cur_string->num_of_params == 0) /* trace with no params */
+ list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+ } else {
+ cur_string = mlx5_tracer_message_get(tracer, tracer_event);
+ if (!cur_string) {
+ pr_debug("%s Got string event for unknown string tmsn: %d\n",
+ __func__, tracer_event->string_event.tmsn);
+ return -1;
+ }
+ cur_string->last_param_num += 1;
+ if (cur_string->last_param_num > TRACER_MAX_PARAMS) {
+ pr_debug("%s Number of params exceeds the max (%d)\n",
+ __func__, TRACER_MAX_PARAMS);
+ list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+ return 0;
+ }
+ /* keep the new parameter */
+ cur_string->params[cur_string->last_param_num - 1] =
+ tracer_event->string_event.string_param;
+ if (cur_string->last_param_num == cur_string->num_of_params)
+ list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+ }
+
+ return 0;
+}
+
+static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct tracer_timestamp_event timestamp_event =
+ tracer_event->timestamp_event;
+ struct tracer_string_format *str_frmt, *tmp_str;
+ struct mlx5_core_dev *dev = tracer->dev;
+ u64 trace_timestamp;
+
+ list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, list) {
+ list_del(&str_frmt->list);
+ if (str_frmt->timestamp < (timestamp_event.timestamp & MASK_6_0))
+ trace_timestamp = (timestamp_event.timestamp & MASK_52_7) |
+ (str_frmt->timestamp & MASK_6_0);
+ else
+ trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) |
+ (str_frmt->timestamp & MASK_6_0);
+
+ mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);
+ }
+}
+
+static int mlx5_tracer_handle_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ if (tracer_event->type == TRACER_EVENT_TYPE_STRING) {
+ mlx5_tracer_handle_string_trace(tracer, tracer_event);
+ } else if (tracer_event->type == TRACER_EVENT_TYPE_TIMESTAMP) {
+ if (!tracer_event->timestamp_event.unreliable)
+ mlx5_tracer_handle_timestamp_trace(tracer, tracer_event);
+ } else {
+ pr_debug("%s Got unrecognised type %d for parsing, exiting..\n",
+ __func__, tracer_event->type);
+ }
+ return 0;
+}
+
+static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
+{
+ struct mlx5_fw_tracer *tracer =
+ container_of(work, struct mlx5_fw_tracer, handle_traces_work);
+ u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK];
+ u32 block_count, start_offset, prev_start_offset, prev_consumer_index;
+ u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event);
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct tracer_event tracer_event;
+ int i;
+
+ mlx5_core_dbg(dev, "FWTracer: Handle Trace event, owner=(%d)\n", tracer->owner);
+ if (!tracer->owner)
+ return;
+
+ if (unlikely(!tracer->str_db.loaded))
+ goto arm;
+
+ block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
+ start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+ /* Copy the block to local buffer to avoid HW override while being processed */
+ memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+ TRACER_BLOCK_SIZE_BYTE);
+
+ block_timestamp =
+ get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+
+ while (block_timestamp > tracer->last_timestamp) {
+ /* Check block override if it's not the first block */
+ if (tracer->last_timestamp) {
+ u64 *ts_event;
+ /* To avoid block override be the HW in case of buffer
+ * wraparound, the time stamp of the previous block
+ * should be compared to the last timestamp handled
+ * by the driver.
+ */
+ prev_consumer_index =
+ (tracer->buff.consumer_index - 1) & (block_count - 1);
+ prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+ ts_event = tracer->buff.log_buf + prev_start_offset +
+ (TRACES_PER_BLOCK - 1) * trace_event_size;
+ last_block_timestamp = get_block_timestamp(tracer, ts_event);
+ /* If previous timestamp different from last stored
+ * timestamp then there is a good chance that the
+ * current buffer is overwritten and therefore should
+ * not be parsed.
+ */
+ if (tracer->last_timestamp != last_block_timestamp) {
+ mlx5_core_warn(dev, "FWTracer: Events were lost\n");
+ tracer->last_timestamp = block_timestamp;
+ tracer->buff.consumer_index =
+ (tracer->buff.consumer_index + 1) & (block_count - 1);
+ break;
+ }
+ }
+
+ /* Parse events */
+ for (i = 0; i < TRACES_PER_BLOCK ; i++) {
+ poll_trace(tracer, &tracer_event, &tmp_trace_block[i]);
+ mlx5_tracer_handle_trace(tracer, &tracer_event);
+ }
+
+ tracer->buff.consumer_index =
+ (tracer->buff.consumer_index + 1) & (block_count - 1);
+
+ tracer->last_timestamp = block_timestamp;
+ start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+ memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+ TRACER_BLOCK_SIZE_BYTE);
+ block_timestamp = get_block_timestamp(tracer,
+ &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+ }
+
+arm:
+ mlx5_fw_tracer_arm(dev);
+}
+
+static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+ int err;
+
+ MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
+ MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
+ ilog2(TRACER_BUFFER_PAGE_NUM));
+ MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CONF, 0, 1);
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
+
+ tracer->buff.consumer_index = 0;
+ return err;
+}
+
+static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ int err;
+
+ MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS);
+ MLX5_SET(mtrc_ctrl, in, trace_status, status);
+ MLX5_SET(mtrc_ctrl, in, arm_event, arm);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CTRL, 0, 1);
+
+ if (!err && status)
+ tracer->last_timestamp = 0;
+
+ return err;
+}
+
+static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ int err;
+
+ err = mlx5_fw_tracer_ownership_acquire(tracer);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+ /* Don't fail since ownership can be acquired on a later FW event */
+ return 0;
+ }
+
+ err = mlx5_fw_tracer_set_mtrc_conf(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err);
+ goto release_ownership;
+ }
+
+ /* enable tracer & trace events */
+ err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err);
+ goto release_ownership;
+ }
+
+ mlx5_core_dbg(dev, "FWTracer: Ownership granted and active\n");
+ return 0;
+
+release_ownership:
+ mlx5_fw_tracer_ownership_release(tracer);
+ return err;
+}
+
+static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+{
+ struct mlx5_fw_tracer *tracer =
+ container_of(work, struct mlx5_fw_tracer, ownership_change_work);
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner);
+ if (tracer->owner) {
+ mlx5_fw_tracer_ownership_acquire(tracer);
+ return;
+ }
+
+ mlx5_fw_tracer_start(tracer);
+}
+
+static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev,
+ u32 *in, int size_in)
+{
+ u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) &&
+ !MLX5_CAP_DEBUG(dev, core_dump_qp))
+ return -EOPNOTSUPP;
+
+ return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out),
+ MLX5_REG_CORE_DUMP, 0, 1);
+}
+
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_tracer *tracer = dev->tracer;
+ u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+ int err;
+
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer)
+ return -EOPNOTSUPP;
+ if (!tracer->owner)
+ return -EPERM;
+
+ MLX5_SET(core_dump_reg, in, core_dump_type, 0x0);
+
+ err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in));
+ if (err)
+ return err;
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ flush_workqueue(tracer->work_queue);
+ return 0;
+}
+
+static int
+mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg,
+ struct mlx5_fw_trace_data *trace_data)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+ return 0;
+}
+
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_fw_trace_data *straces = tracer->st_arr.straces;
+ u32 index, start_index, end_index;
+ u32 saved_traces_index;
+ int err;
+
+ if (!straces[0].timestamp)
+ return -ENOMSG;
+
+ mutex_lock(&tracer->st_arr.lock);
+ saved_traces_index = tracer->st_arr.saved_traces_index;
+ if (straces[saved_traces_index].timestamp)
+ start_index = saved_traces_index;
+ else
+ start_index = 0;
+ end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1);
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces");
+ if (err)
+ goto unlock;
+ index = start_index;
+ while (index != end_index) {
+ err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]);
+ if (err)
+ goto unlock;
+
+ index = (index + 1) & (SAVED_TRACES_NUM - 1);
+ }
+
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+unlock:
+ mutex_unlock(&tracer->st_arr.lock);
+ return err;
+}
+
+/* Create software resources (Buffers, etc ..) */
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_tracer *tracer = NULL;
+ int err;
+
+ if (!MLX5_CAP_MCAM_REG(dev, tracer_registers)) {
+ mlx5_core_dbg(dev, "FWTracer: Tracer capability not present\n");
+ return NULL;
+ }
+
+ tracer = kvzalloc(sizeof(*tracer), GFP_KERNEL);
+ if (!tracer)
+ return ERR_PTR(-ENOMEM);
+
+ tracer->work_queue = create_singlethread_workqueue("mlx5_fw_tracer");
+ if (!tracer->work_queue) {
+ err = -ENOMEM;
+ goto free_tracer;
+ }
+
+ tracer->dev = dev;
+
+ INIT_LIST_HEAD(&tracer->ready_strings_list);
+ INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change);
+ INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db);
+ INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces);
+
+
+ err = mlx5_query_mtrc_caps(tracer);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+ goto destroy_workqueue;
+ }
+
+ err = mlx5_fw_tracer_create_log_buf(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Create log buffer failed %d\n", err);
+ goto destroy_workqueue;
+ }
+
+ err = mlx5_fw_tracer_allocate_strings_db(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Allocate strings database failed %d\n", err);
+ goto free_log_buf;
+ }
+
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
+ mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
+
+ return tracer;
+
+free_log_buf:
+ mlx5_fw_tracer_destroy_log_buf(tracer);
+destroy_workqueue:
+ tracer->dev = NULL;
+ destroy_workqueue(tracer->work_queue);
+free_tracer:
+ kvfree(tracer);
+ return ERR_PTR(err);
+}
+
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
+
+/* Create HW resources + start tracer */
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev;
+ int err;
+
+ if (IS_ERR_OR_NULL(tracer))
+ return 0;
+
+ dev = tracer->dev;
+
+ if (!tracer->str_db.loaded)
+ queue_work(tracer->work_queue, &tracer->read_fw_strings_work);
+
+ err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err);
+ goto err_cancel_work;
+ }
+
+ err = mlx5_fw_tracer_create_mkey(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to create mkey %d\n", err);
+ goto err_dealloc_pd;
+ }
+
+ MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
+ mlx5_eq_notifier_register(dev, &tracer->nb);
+
+ err = mlx5_fw_tracer_start(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err);
+ goto err_notifier_unregister;
+ }
+ return 0;
+
+err_notifier_unregister:
+ mlx5_eq_notifier_unregister(dev, &tracer->nb);
+ mlx5_core_destroy_mkey(dev, tracer->buff.mkey);
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
+err_cancel_work:
+ cancel_work_sync(&tracer->read_fw_strings_work);
+ return err;
+}
+
+/* Stop tracer + Cleanup HW resources */
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
+{
+ if (IS_ERR_OR_NULL(tracer))
+ return;
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
+ tracer->owner);
+ mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
+ cancel_work_sync(&tracer->ownership_change_work);
+ cancel_work_sync(&tracer->handle_traces_work);
+
+ if (tracer->owner)
+ mlx5_fw_tracer_ownership_release(tracer);
+
+ mlx5_core_destroy_mkey(tracer->dev, tracer->buff.mkey);
+ mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
+}
+
+/* Free software resources (Buffers, etc ..) */
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
+{
+ if (IS_ERR_OR_NULL(tracer))
+ return;
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: Destroy\n");
+
+ cancel_work_sync(&tracer->read_fw_strings_work);
+ mlx5_fw_tracer_clean_ready_list(tracer);
+ mlx5_fw_tracer_clean_print_hash(tracer);
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
+ mlx5_fw_tracer_free_strings_db(tracer);
+ mlx5_fw_tracer_destroy_log_buf(tracer);
+ destroy_workqueue(tracer->work_queue);
+ kvfree(tracer);
+}
+
+static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev;
+ int err;
+
+ cancel_work_sync(&tracer->read_fw_strings_work);
+ mlx5_fw_tracer_clean_ready_list(tracer);
+ mlx5_fw_tracer_clean_print_hash(tracer);
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
+ mlx5_fw_tracer_free_strings_db(tracer);
+
+ dev = tracer->dev;
+ err = mlx5_query_mtrc_caps(tracer);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+ return err;
+ }
+
+ err = mlx5_fw_tracer_allocate_strings_db(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err);
+ return err;
+ }
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
+
+ return 0;
+}
+
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev;
+ int err;
+
+ if (IS_ERR_OR_NULL(tracer))
+ return 0;
+
+ dev = tracer->dev;
+ mlx5_fw_tracer_cleanup(tracer);
+ err = mlx5_fw_tracer_recreate_strings_db(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to recreate FW tracer strings DB\n");
+ return err;
+ }
+ err = mlx5_fw_tracer_init(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to re-initialize FW tracer\n");
+ return err;
+ }
+
+ return 0;
+}
+
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct mlx5_eqe *eqe = data;
+
+ switch (eqe->sub_type) {
+ case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
+ queue_work(tracer->work_queue, &tracer->ownership_change_work);
+ break;
+ case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ break;
+ default:
+ mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
+ eqe->sub_type);
+ }
+
+ return NOTIFY_OK;
+}
+
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
new file mode 100644
index 000000000..4762b55b0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_TRACER_H__
+#define __LIB_TRACER_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#define STRINGS_DB_SECTIONS_NUM 8
+#define STRINGS_DB_READ_SIZE_BYTES 256
+#define STRINGS_DB_LEFTOVER_SIZE_BYTES 64
+#define TRACER_BUFFER_PAGE_NUM 64
+#define TRACER_BUFFER_CHUNK 4096
+#define TRACE_BUFFER_SIZE_BYTE (TRACER_BUFFER_PAGE_NUM * TRACER_BUFFER_CHUNK)
+
+#define TRACER_BLOCK_SIZE_BYTE 256
+#define TRACES_PER_BLOCK 32
+
+#define TRACE_STR_MSG 256
+#define SAVED_TRACES_NUM 8192
+
+#define TRACER_MAX_PARAMS 7
+#define MESSAGE_HASH_BITS 6
+#define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
+
+#define MASK_52_7 (0x1FFFFFFFFFFF80)
+#define MASK_6_0 (0x7F)
+
+struct mlx5_fw_trace_data {
+ u64 timestamp;
+ bool lost;
+ u8 event_id;
+ char msg[TRACE_STR_MSG];
+};
+
+struct mlx5_fw_tracer {
+ struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
+ bool owner;
+ u8 trc_ver;
+ struct workqueue_struct *work_queue;
+ struct work_struct ownership_change_work;
+ struct work_struct read_fw_strings_work;
+
+ /* Strings DB */
+ struct {
+ u8 first_string_trace;
+ u8 num_string_trace;
+ u32 num_string_db;
+ u32 base_address_out[STRINGS_DB_SECTIONS_NUM];
+ u32 size_out[STRINGS_DB_SECTIONS_NUM];
+ void *buffer[STRINGS_DB_SECTIONS_NUM];
+ bool loaded;
+ } str_db;
+
+ /* Log Buffer */
+ struct {
+ u32 pdn;
+ void *log_buf;
+ dma_addr_t dma;
+ u32 size;
+ u32 mkey;
+ u32 consumer_index;
+ } buff;
+
+ /* Saved Traces Array */
+ struct {
+ struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM];
+ u32 saved_traces_index;
+ struct mutex lock; /* Protect st_arr access */
+ } st_arr;
+
+ u64 last_timestamp;
+ struct work_struct handle_traces_work;
+ struct hlist_head hash[MESSAGE_HASH_SIZE];
+ struct list_head ready_strings_list;
+};
+
+struct tracer_string_format {
+ char *string;
+ int params[TRACER_MAX_PARAMS];
+ int num_of_params;
+ int last_param_num;
+ u8 event_id;
+ u32 tmsn;
+ struct hlist_node hlist;
+ struct list_head list;
+ u32 timestamp;
+ bool lost;
+};
+
+enum mlx5_fw_tracer_ownership_state {
+ MLX5_FW_TRACER_RELEASE_OWNERSHIP,
+ MLX5_FW_TRACER_ACQUIRE_OWNERSHIP,
+};
+
+enum tracer_ctrl_fields_select {
+ TRACE_STATUS = 1 << 0,
+};
+
+enum tracer_event_type {
+ TRACER_EVENT_TYPE_STRING,
+ TRACER_EVENT_TYPE_TIMESTAMP = 0xFF,
+ TRACER_EVENT_TYPE_UNRECOGNIZED,
+};
+
+enum tracing_mode {
+ TRACE_TO_MEMORY = 1 << 0,
+};
+
+struct tracer_timestamp_event {
+ u64 timestamp;
+ u8 unreliable;
+};
+
+struct tracer_string_event {
+ u32 timestamp;
+ u32 tmsn;
+ u32 tdsn;
+ u32 string_param;
+};
+
+struct tracer_event {
+ bool lost_event;
+ u32 type;
+ u8 event_id;
+ union {
+ struct tracer_string_event string_event;
+ struct tracer_timestamp_event timestamp_event;
+ };
+};
+
+struct mlx5_ifc_tracer_event_bits {
+ u8 lost[0x1];
+ u8 timestamp[0x7];
+ u8 event_id[0x8];
+ u8 event_data[0x30];
+};
+
+struct mlx5_ifc_tracer_string_event_bits {
+ u8 lost[0x1];
+ u8 timestamp[0x7];
+ u8 event_id[0x8];
+ u8 tmsn[0xd];
+ u8 tdsn[0x3];
+ u8 string_param[0x20];
+};
+
+struct mlx5_ifc_tracer_timestamp_event_bits {
+ u8 timestamp7_0[0x8];
+ u8 event_id[0x8];
+ u8 urts[0x3];
+ u8 timestamp52_40[0xd];
+ u8 timestamp39_8[0x20];
+};
+
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+ struct devlink_fmsg *fmsg);
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
new file mode 100644
index 000000000..3038be575
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(__LIB_TRACER_TRACEPOINT_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __LIB_TRACER_TRACEPOINT_H__
+
+#include <linux/tracepoint.h>
+#include "fw_tracer.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+/* Tracepoint for FWTracer messages: */
+TRACE_EVENT(mlx5_fw,
+ TP_PROTO(const struct mlx5_fw_tracer *tracer, u64 trace_timestamp,
+ bool lost, u8 event_id, const char *msg),
+
+ TP_ARGS(tracer, trace_timestamp, lost, event_id, msg),
+
+ TP_STRUCT__entry(
+ __string(dev_name, dev_name(tracer->dev->device))
+ __field(u64, trace_timestamp)
+ __field(bool, lost)
+ __field(u8, event_id)
+ __string(msg, msg)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name,
+ dev_name(tracer->dev->device));
+ __entry->trace_timestamp = trace_timestamp;
+ __entry->lost = lost;
+ __entry->event_id = event_id;
+ __assign_str(msg, msg);
+ ),
+
+ TP_printk("%s [0x%llx] %d [0x%x] %s",
+ __get_str(dev_name),
+ __entry->trace_timestamp,
+ __entry->lost, __entry->event_id,
+ __get_str(msg))
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ./diag
+#define TRACE_INCLUDE_FILE fw_tracer_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
new file mode 100644
index 000000000..c5b560a8b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rsc_dump.h"
+#include "lib/mlx5.h"
+
+#define MLX5_SGMT_TYPE(SGMT) MLX5_SGMT_TYPE_##SGMT
+#define MLX5_SGMT_STR_ASSING(SGMT)[MLX5_SGMT_TYPE(SGMT)] = #SGMT
+static const char *const mlx5_rsc_sgmt_name[] = {
+ MLX5_SGMT_STR_ASSING(HW_CQPC),
+ MLX5_SGMT_STR_ASSING(HW_SQPC),
+ MLX5_SGMT_STR_ASSING(HW_RQPC),
+ MLX5_SGMT_STR_ASSING(FULL_SRQC),
+ MLX5_SGMT_STR_ASSING(FULL_CQC),
+ MLX5_SGMT_STR_ASSING(FULL_EQC),
+ MLX5_SGMT_STR_ASSING(FULL_QPC),
+ MLX5_SGMT_STR_ASSING(SND_BUFF),
+ MLX5_SGMT_STR_ASSING(RCV_BUFF),
+ MLX5_SGMT_STR_ASSING(SRQ_BUFF),
+ MLX5_SGMT_STR_ASSING(CQ_BUFF),
+ MLX5_SGMT_STR_ASSING(EQ_BUFF),
+ MLX5_SGMT_STR_ASSING(SX_SLICE),
+ MLX5_SGMT_STR_ASSING(SX_SLICE_ALL),
+ MLX5_SGMT_STR_ASSING(RDB),
+ MLX5_SGMT_STR_ASSING(RX_SLICE_ALL),
+ MLX5_SGMT_STR_ASSING(PRM_QUERY_QP),
+ MLX5_SGMT_STR_ASSING(PRM_QUERY_CQ),
+ MLX5_SGMT_STR_ASSING(PRM_QUERY_MKEY),
+};
+
+struct mlx5_rsc_dump {
+ u32 pdn;
+ u32 mkey;
+ u32 number_of_menu_items;
+ u16 fw_segment_type[MLX5_SGMT_TYPE_NUM];
+};
+
+struct mlx5_rsc_dump_cmd {
+ u64 mem_size;
+ u8 cmd[MLX5_ST_SZ_BYTES(resource_dump)];
+};
+
+static int mlx5_rsc_dump_sgmt_get_by_name(char *name)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5_rsc_sgmt_name); i++)
+ if (!strcmp(name, mlx5_rsc_sgmt_name[i]))
+ return i;
+
+ return -EINVAL;
+}
+
+#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \
+ MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \
+ MLX5_ST_SZ_BYTES(resource_dump_menu_segment))
+
+static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page,
+ int read_size, int start_idx)
+{
+ void *data = page_address(page);
+ enum mlx5_sgmt_type sgmt_idx;
+ int num_of_items;
+ char *sgmt_name;
+ void *member;
+ int size = 0;
+ void *menu;
+ int i;
+
+ if (!start_idx) {
+ menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
+ rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu,
+ num_of_records);
+ size = MLX5_RSC_DUMP_MENU_HEADER_SIZE;
+ data += size;
+ }
+ num_of_items = rsc_dump->number_of_menu_items;
+
+ for (i = 0; start_idx + i < num_of_items; i++) {
+ size += MLX5_ST_SZ_BYTES(resource_dump_menu_record);
+ if (size >= read_size)
+ return start_idx + i;
+
+ member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i;
+ sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name);
+ sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name);
+ if (sgmt_idx == -EINVAL)
+ continue;
+ rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record,
+ member, segment_type);
+ }
+ return 0;
+}
+
+static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
+ struct page *page)
+{
+ struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump;
+ struct device *ddev = mlx5_core_dma_dev(dev);
+ u32 out_seq_num;
+ u32 in_seq_num;
+ dma_addr_t dma;
+ int err;
+
+ dma = dma_map_page(ddev, page, 0, cmd->mem_size, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(ddev, dma)))
+ return -ENOMEM;
+
+ in_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num);
+ MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey);
+ MLX5_SET64(resource_dump, cmd->cmd, address, dma);
+
+ err = mlx5_core_access_reg(dev, cmd->cmd, sizeof(cmd->cmd), cmd->cmd,
+ sizeof(cmd->cmd), MLX5_REG_RESOURCE_DUMP, 0, 1);
+ if (err) {
+ mlx5_core_err(dev, "Resource dump: Failed to access err %d\n", err);
+ goto out;
+ }
+ out_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num);
+ if (out_seq_num && (in_seq_num + 1 != out_seq_num))
+ err = -EIO;
+out:
+ dma_unmap_page(ddev, dma, cmd->mem_size, DMA_FROM_DEVICE);
+ return err;
+}
+
+struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev,
+ struct mlx5_rsc_key *key)
+{
+ struct mlx5_rsc_dump_cmd *cmd;
+ int sgmt_type;
+
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ sgmt_type = dev->rsc_dump->fw_segment_type[key->rsc];
+ if (!sgmt_type && key->rsc != MLX5_SGMT_TYPE_MENU)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+ if (!cmd) {
+ mlx5_core_err(dev, "Resource dump: Failed to allocate command\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ MLX5_SET(resource_dump, cmd->cmd, segment_type, sgmt_type);
+ MLX5_SET(resource_dump, cmd->cmd, index1, key->index1);
+ MLX5_SET(resource_dump, cmd->cmd, index2, key->index2);
+ MLX5_SET(resource_dump, cmd->cmd, num_of_obj1, key->num_of_obj1);
+ MLX5_SET(resource_dump, cmd->cmd, num_of_obj2, key->num_of_obj2);
+ MLX5_SET(resource_dump, cmd->cmd, size, key->size);
+ cmd->mem_size = key->size;
+ return cmd;
+}
+EXPORT_SYMBOL(mlx5_rsc_dump_cmd_create);
+
+void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd)
+{
+ kfree(cmd);
+}
+EXPORT_SYMBOL(mlx5_rsc_dump_cmd_destroy);
+
+int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
+ struct page *page, int *size)
+{
+ bool more_dump;
+ int err;
+
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return -EOPNOTSUPP;
+
+ err = mlx5_rsc_dump_trigger(dev, cmd, page);
+ if (err) {
+ mlx5_core_err(dev, "Resource dump: Failed to trigger dump, %d\n", err);
+ return err;
+ }
+ *size = MLX5_GET(resource_dump, cmd->cmd, size);
+ more_dump = MLX5_GET(resource_dump, cmd->cmd, more_dump);
+
+ return more_dump;
+}
+EXPORT_SYMBOL(mlx5_rsc_dump_next);
+
+#define MLX5_RSC_DUMP_MENU_SEGMENT 0xffff
+static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rsc_dump_cmd *cmd = NULL;
+ struct mlx5_rsc_key key = {};
+ struct page *page;
+ int start_idx = 0;
+ int size;
+ int err;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ key.rsc = MLX5_SGMT_TYPE_MENU;
+ key.size = PAGE_SIZE;
+ cmd = mlx5_rsc_dump_cmd_create(dev, &key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+ MLX5_SET(resource_dump, cmd->cmd, segment_type, MLX5_RSC_DUMP_MENU_SEGMENT);
+
+ do {
+ err = mlx5_rsc_dump_next(dev, cmd, page, &size);
+ if (err < 0)
+ goto destroy_cmd;
+
+ start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx);
+
+ } while (err > 0);
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+free_page:
+ __free_page(page);
+
+ return err;
+}
+
+static int mlx5_rsc_dump_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ u32 *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rsc_dump *rsc_dump;
+
+ if (!MLX5_CAP_DEBUG(dev, resource_dump)) {
+ mlx5_core_dbg(dev, "Resource dump: capability not present\n");
+ return NULL;
+ }
+ rsc_dump = kzalloc(sizeof(*rsc_dump), GFP_KERNEL);
+ if (!rsc_dump)
+ return ERR_PTR(-ENOMEM);
+
+ return rsc_dump;
+}
+
+void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev)
+{
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return;
+ kfree(dev->rsc_dump);
+}
+
+int mlx5_rsc_dump_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump;
+ int err;
+
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return 0;
+
+ err = mlx5_core_alloc_pd(dev, &rsc_dump->pdn);
+ if (err) {
+ mlx5_core_warn(dev, "Resource dump: Failed to allocate PD %d\n", err);
+ return err;
+ }
+ err = mlx5_rsc_dump_create_mkey(dev, rsc_dump->pdn, &rsc_dump->mkey);
+ if (err) {
+ mlx5_core_err(dev, "Resource dump: Failed to create mkey, %d\n", err);
+ goto free_pd;
+ }
+ err = mlx5_rsc_dump_menu(dev);
+ if (err) {
+ mlx5_core_err(dev, "Resource dump: Failed to read menu, %d\n", err);
+ goto destroy_mkey;
+ }
+ return err;
+
+destroy_mkey:
+ mlx5_core_destroy_mkey(dev, rsc_dump->mkey);
+free_pd:
+ mlx5_core_dealloc_pd(dev, rsc_dump->pdn);
+ return err;
+}
+
+void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev)
+{
+ if (IS_ERR_OR_NULL(dev->rsc_dump))
+ return;
+
+ mlx5_core_destroy_mkey(dev, dev->rsc_dump->mkey);
+ mlx5_core_dealloc_pd(dev, dev->rsc_dump->pdn);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h
new file mode 100644
index 000000000..64c4956db
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_RSC_DUMP_H
+#define __MLX5_RSC_DUMP_H
+
+#include <linux/mlx5/rsc_dump.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#define MLX5_RSC_DUMP_ALL 0xFFFF
+struct mlx5_rsc_dump_cmd;
+struct mlx5_rsc_dump;
+
+struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev);
+void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_rsc_dump_init(struct mlx5_core_dev *dev);
+void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev);
+
+struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev,
+ struct mlx5_rsc_key *key);
+void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd);
+
+int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
+ struct page *page, int *size);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
new file mode 100644
index 000000000..d000236dd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "ecpf.h"
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev)
+{
+ return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
+}
+
+static bool mlx5_ecpf_esw_admins_host_pf(const struct mlx5_core_dev *dev)
+{
+ /* In separate host mode, PF enables itself.
+ * When ECPF is eswitch manager, eswitch enables host PF after
+ * eswitch is setup.
+ */
+ return mlx5_core_is_ecpf_esw_manager(dev);
+}
+
+int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
+
+ MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+ MLX5_SET(enable_hca_in, in, function_id, 0);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+ return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+}
+
+int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
+
+ MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+ MLX5_SET(disable_hca_in, in, function_id, 0);
+ MLX5_SET(disable_hca_in, in, embedded_cpu_function, 0);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_host_pf_init(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ if (mlx5_ecpf_esw_admins_host_pf(dev))
+ return 0;
+
+ /* ECPF shall enable HCA for host PF in the same way a PF
+ * does this for its VFs when ECPF is not a eswitch manager.
+ */
+ err = mlx5_cmd_host_pf_enable_hca(dev);
+ if (err)
+ mlx5_core_err(dev, "Failed to enable external host PF HCA err(%d)\n", err);
+
+ return err;
+}
+
+static void mlx5_host_pf_cleanup(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ if (mlx5_ecpf_esw_admins_host_pf(dev))
+ return;
+
+ err = mlx5_cmd_host_pf_disable_hca(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to disable external host PF HCA err(%d)\n", err);
+ return;
+ }
+}
+
+int mlx5_ec_init(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_ecpf(dev))
+ return 0;
+
+ return mlx5_host_pf_init(dev);
+}
+
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ if (!mlx5_core_is_ecpf(dev))
+ return;
+
+ mlx5_host_pf_cleanup(dev);
+
+ err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]);
+ if (err)
+ mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err);
+
+ err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF]);
+ if (err)
+ mlx5_core_warn(dev, "Timeout reclaiming external host VFs pages err(%d)\n", err);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
new file mode 100644
index 000000000..40b6ad76d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_ECPF_H__
+#define __MLX5_ECPF_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+enum {
+ MLX5_ECPU_BIT_NUM = 23,
+};
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
+int mlx5_ec_init(struct mlx5_core_dev *dev);
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline bool
+mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
+static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_ECPF_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
new file mode 100644
index 000000000..0ee456480
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -0,0 +1,1246 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_H__
+#define __MLX5_EN_H__
+
+#include <linux/if_vlan.h>
+#include <linux/etherdevice.h>
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/crash_dump.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/port.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/transobj.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rhashtable.h>
+#include <net/udp_tunnel.h>
+#include <net/switchdev.h>
+#include <net/xdp.h>
+#include <linux/dim.h>
+#include <linux/bits.h>
+#include "wq.h"
+#include "mlx5_core.h"
+#include "en_stats.h"
+#include "en/dcbnl.h"
+#include "en/fs.h"
+#include "en/qos.h"
+#include "lib/hv_vhca.h"
+#include "lib/clock.h"
+#include "en/rx_res.h"
+#include "en/selq.h"
+
+extern const struct net_device_ops mlx5e_netdev_ops;
+struct page_pool;
+
+#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
+#define MLX5E_METADATA_ETHER_LEN 8
+
+#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
+
+#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
+#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
+
+#define MLX5E_MAX_NUM_TC 8
+#define MLX5E_MAX_NUM_MQPRIO_CH_TC TC_QOPT_MAX_QUEUE
+
+#define MLX5_RX_HEADROOM NET_SKB_PAD
+#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+#define MLX5E_RX_MAX_HEAD (256)
+#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
+#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
+#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
+#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024)
+#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096)
+
+#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
+ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
+#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \
+ max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
+#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \
+ MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD))
+
+#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18
+
+/* Keep in sync with mlx5e_mpwrq_log_wqe_sz.
+ * These are theoretical maximums, which can be further restricted by
+ * capabilities. These values are used for static resource allocations and
+ * sanity checks.
+ * MLX5_SEND_WQE_MAX_SIZE is a bit bigger than the maximum cacheline-aligned WQE
+ * size actually used at runtime, but it's not a problem when calculating static
+ * array sizes.
+ */
+#define MLX5_UMR_MAX_MTT_SPACE \
+ (ALIGN_DOWN(MLX5_SEND_WQE_MAX_SIZE - sizeof(struct mlx5e_umr_wqe), \
+ MLX5_UMR_MTT_ALIGNMENT))
+#define MLX5_MPWRQ_MAX_PAGES_PER_WQE \
+ rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt))
+
+#define MLX5E_MAX_RQ_NUM_MTTS \
+ (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */
+#define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */
+#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
+
+#define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM))
+#define MLX5E_LOG_MAX_RX_WQE_BULK \
+ (ilog2(PAGE_SIZE / roundup_pow_of_two(MLX5E_MIN_SKB_FRAG_SZ)))
+
+#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6
+#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK)
+#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
+
+#define MLX5E_DEFAULT_LRO_TIMEOUT 32
+#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4
+
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80
+#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2
+
+#define MLX5E_MIN_NUM_CHANNELS 0x1
+#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2)
+#define MLX5E_TX_CQ_POLL_BUDGET 128
+#define MLX5E_TX_XSK_POLL_BUDGET 64
+#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
+
+#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
+ (sizeof(struct mlx5e_umr_wqe) +\
+ (sizeof(struct mlx5_klm) * (sgl_len)))
+
+#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \
+ (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB))
+
+#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\
+ (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS))
+
+#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\
+ (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm))
+
+#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
+ ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
+
+#define MLX5E_MAX_KLM_PER_WQE(mdev) \
+ MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev))
+
+#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
+
+#define mlx5e_dbg(mlevel, priv, format, ...) \
+do { \
+ if (NETIF_MSG_##mlevel & (priv)->msglevel) \
+ netdev_warn(priv->netdev, format, \
+ ##__VA_ARGS__); \
+} while (0)
+
+#define mlx5e_state_dereference(priv, p) \
+ rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
+
+static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev)
+{
+ if (mlx5_lag_is_lacp_owner(mdev))
+ return 1;
+
+ return clamp_t(u8, MLX5_CAP_GEN(mdev, num_lag_ports), 1, MLX5_MAX_PORTS);
+}
+
+static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
+{
+ switch (wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW,
+ wq_size / 2);
+ default:
+ return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES,
+ wq_size / 2);
+ }
+}
+
+/* Use this function to get max num channels (rxqs/txqs) only to create netdev */
+static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
+{
+ return is_kdump_kernel() ?
+ MLX5E_MIN_NUM_CHANNELS :
+ min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
+}
+
+/* The maximum WQE size can be retrieved by max_wqe_sz_sq in
+ * bytes units. Driver hardens the limitation to 1KB (16
+ * WQEBBs), unless firmware capability is stricter.
+ */
+static inline u8 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev)
+{
+ BUILD_BUG_ON(MLX5_SEND_WQE_MAX_WQEBBS > U8_MAX);
+
+ return (u8)min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS,
+ MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB);
+}
+
+static inline u8 mlx5e_get_max_sq_aligned_wqebbs(struct mlx5_core_dev *mdev)
+{
+/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS.
+ * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16,
+ * see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64)
+ * overflows the 6-bit DS field of Ctrl Segment. Use a bound lower
+ * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be
+ * cache-aligned.
+ */
+ u8 wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
+
+ wqebbs = min_t(u8, wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1);
+#if L1_CACHE_BYTES >= 128
+ wqebbs = ALIGN_DOWN(wqebbs, 2);
+#endif
+ return wqebbs;
+}
+
+struct mlx5e_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_eth_seg eth;
+ struct mlx5_wqe_data_seg data[];
+};
+
+struct mlx5e_rx_wqe_ll {
+ struct mlx5_wqe_srq_next_seg next;
+ struct mlx5_wqe_data_seg data[];
+};
+
+struct mlx5e_rx_wqe_cyc {
+ struct mlx5_wqe_data_seg data[0];
+};
+
+struct mlx5e_umr_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_umr_ctrl_seg uctrl;
+ struct mlx5_mkey_seg mkc;
+ union {
+ DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
+ DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
+ DECLARE_FLEX_ARRAY(struct mlx5_ksm, inline_ksms);
+ };
+};
+
+enum mlx5e_priv_flag {
+ MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ MLX5E_PFLAG_RX_CQE_COMPRESS,
+ MLX5E_PFLAG_RX_STRIDING_RQ,
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
+ MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5E_PFLAG_SKB_TX_MPWQE,
+ MLX5E_PFLAG_TX_PORT_TS,
+ MLX5E_NUM_PFLAGS, /* Keep last */
+};
+
+#define MLX5E_SET_PFLAG(params, pflag, enable) \
+ do { \
+ if (enable) \
+ (params)->pflags |= BIT(pflag); \
+ else \
+ (params)->pflags &= ~(BIT(pflag)); \
+ } while (0)
+
+#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
+
+enum packet_merge {
+ MLX5E_PACKET_MERGE_NONE,
+ MLX5E_PACKET_MERGE_LRO,
+ MLX5E_PACKET_MERGE_SHAMPO,
+};
+
+struct mlx5e_packet_merge_param {
+ enum packet_merge type;
+ u32 timeout;
+ struct {
+ u8 match_criteria_type;
+ u8 alignment_granularity;
+ } shampo;
+};
+
+struct mlx5e_params {
+ u8 log_sq_size;
+ u8 rq_wq_type;
+ u8 log_rq_mtu_frames;
+ u16 num_channels;
+ struct {
+ u16 mode;
+ u8 num_tc;
+ struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
+ struct {
+ u64 max_rate[TC_MAX_QUEUE];
+ u32 hw_id[TC_MAX_QUEUE];
+ } channel;
+ } mqprio;
+ bool rx_cqe_compress_def;
+ struct dim_cq_moder rx_cq_moderation;
+ struct dim_cq_moder tx_cq_moderation;
+ struct mlx5e_packet_merge_param packet_merge;
+ u8 tx_min_inline_mode;
+ bool vlan_strip_disable;
+ bool scatter_fcs_en;
+ bool rx_dim_enabled;
+ bool tx_dim_enabled;
+ u32 pflags;
+ struct bpf_prog *xdp_prog;
+ struct mlx5e_xsk *xsk;
+ unsigned int sw_mtu;
+ int hard_mtu;
+ bool ptp_rx;
+};
+
+static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
+{
+ return params->mqprio.mode == TC_MQPRIO_MODE_DCB ?
+ params->mqprio.num_tc : 1;
+}
+
+enum {
+ MLX5E_RQ_STATE_ENABLED,
+ MLX5E_RQ_STATE_RECOVERING,
+ MLX5E_RQ_STATE_AM,
+ MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
+ MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
+ MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
+ MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
+};
+
+struct mlx5e_cq {
+ /* data path - accessed per cqe */
+ struct mlx5_cqwq wq;
+
+ /* data path - accessed per napi poll */
+ u16 event_ctr;
+ struct napi_struct *napi;
+ struct mlx5_core_cq mcq;
+ struct mlx5e_ch_stats *ch_stats;
+
+ /* control */
+ struct net_device *netdev;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+ struct mlx5_wq_ctrl wq_ctrl;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_cq_decomp {
+ /* cqe decompression */
+ struct mlx5_cqe64 title;
+ struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
+ u8 mini_arr_idx;
+ u16 left;
+ u16 wqe_counter;
+} ____cacheline_aligned_in_smp;
+
+enum mlx5e_dma_map_type {
+ MLX5E_DMA_MAP_SINGLE,
+ MLX5E_DMA_MAP_PAGE
+};
+
+struct mlx5e_sq_dma {
+ dma_addr_t addr;
+ u32 size;
+ enum mlx5e_dma_map_type type;
+};
+
+enum {
+ MLX5E_SQ_STATE_ENABLED,
+ MLX5E_SQ_STATE_MPWQE,
+ MLX5E_SQ_STATE_RECOVERING,
+ MLX5E_SQ_STATE_IPSEC,
+ MLX5E_SQ_STATE_AM,
+ MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
+ MLX5E_SQ_STATE_PENDING_XSK_TX,
+ MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC,
+ MLX5E_SQ_STATE_XDP_MULTIBUF,
+};
+
+struct mlx5e_tx_mpwqe {
+ /* Current MPWQE session */
+ struct mlx5e_tx_wqe *wqe;
+ u32 bytes_count;
+ u8 ds_count;
+ u8 pkt_count;
+ u8 inline_on;
+};
+
+struct mlx5e_skb_fifo {
+ struct sk_buff **fifo;
+ u16 *pc;
+ u16 *cc;
+ u16 mask;
+};
+
+struct mlx5e_ptpsq;
+
+struct mlx5e_txqsq {
+ /* data path */
+
+ /* dirtied @completion */
+ u16 cc;
+ u16 skb_fifo_cc;
+ u32 dma_fifo_cc;
+ struct dim dim; /* Adaptive Moderation */
+
+ /* dirtied @xmit */
+ u16 pc ____cacheline_aligned_in_smp;
+ u16 skb_fifo_pc;
+ u32 dma_fifo_pc;
+ struct mlx5e_tx_mpwqe mpwqe;
+
+ struct mlx5e_cq cq;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ u32 dma_fifo_mask;
+ struct mlx5e_sq_stats *stats;
+ struct {
+ struct mlx5e_sq_dma *dma_fifo;
+ struct mlx5e_skb_fifo skb_fifo;
+ struct mlx5e_tx_wqe_info *wqe_info;
+ } db;
+ void __iomem *uar_map;
+ struct netdev_queue *txq;
+ u32 sqn;
+ u16 stop_room;
+ u8 max_sq_mpw_wqebbs;
+ u8 min_inline_mode;
+ struct device *pdev;
+ __be32 mkey_be;
+ unsigned long state;
+ unsigned int hw_mtu;
+ struct mlx5_clock *clock;
+ struct net_device *netdev;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ int ch_ix;
+ int txq_ix;
+ u32 rate_limit;
+ struct work_struct recover_work;
+ struct mlx5e_ptpsq *ptpsq;
+ cqe_ts_to_ns ptp_cyc2time;
+} ____cacheline_aligned_in_smp;
+
+union mlx5e_alloc_unit {
+ struct page *page;
+ struct xdp_buff *xsk;
+};
+
+/* XDP packets can be transmitted in different ways. On completion, we need to
+ * distinguish between them to clean up things in a proper way.
+ */
+enum mlx5e_xdp_xmit_mode {
+ /* An xdp_frame was transmitted due to either XDP_REDIRECT from another
+ * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
+ * returned.
+ */
+ MLX5E_XDP_XMIT_MODE_FRAME,
+
+ /* The xdp_frame was created in place as a result of XDP_TX from a
+ * regular RQ. No DMA remapping happened, and the page belongs to us.
+ */
+ MLX5E_XDP_XMIT_MODE_PAGE,
+
+ /* No xdp_frame was created at all, the transmit happened from a UMEM
+ * page. The UMEM Completion Ring producer pointer has to be increased.
+ */
+ MLX5E_XDP_XMIT_MODE_XSK,
+};
+
+struct mlx5e_xdp_info {
+ enum mlx5e_xdp_xmit_mode mode;
+ union {
+ struct {
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ } frame;
+ struct {
+ struct mlx5e_rq *rq;
+ struct page *page;
+ } page;
+ };
+};
+
+struct mlx5e_xmit_data {
+ dma_addr_t dma_addr;
+ void *data;
+ u32 len;
+};
+
+struct mlx5e_xdp_info_fifo {
+ struct mlx5e_xdp_info *xi;
+ u32 *cc;
+ u32 *pc;
+ u32 mask;
+};
+
+struct mlx5e_xdpsq;
+typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
+typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
+ struct mlx5e_xmit_data *,
+ struct skb_shared_info *,
+ int);
+
+struct mlx5e_xdpsq {
+ /* data path */
+
+ /* dirtied @completion */
+ u32 xdpi_fifo_cc;
+ u16 cc;
+
+ /* dirtied @xmit */
+ u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
+ u16 pc;
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg;
+ struct mlx5e_tx_mpwqe mpwqe;
+
+ struct mlx5e_cq cq;
+
+ /* read only */
+ struct xsk_buff_pool *xsk_pool;
+ struct mlx5_wq_cyc wq;
+ struct mlx5e_xdpsq_stats *stats;
+ mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
+ mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
+ struct {
+ struct mlx5e_xdp_wqe_info *wqe_info;
+ struct mlx5e_xdp_info_fifo xdpi_fifo;
+ } db;
+ void __iomem *uar_map;
+ u32 sqn;
+ struct device *pdev;
+ __be32 mkey_be;
+ u16 stop_room;
+ u8 max_sq_mpw_wqebbs;
+ u8 min_inline_mode;
+ unsigned long state;
+ unsigned int hw_mtu;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5e_channel *channel;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_ktls_resync_resp;
+
+struct mlx5e_icosq {
+ /* data path */
+ u16 cc;
+ u16 pc;
+
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg;
+ struct mlx5e_cq cq;
+
+ /* write@xmit, read@completion */
+ struct {
+ struct mlx5e_icosq_wqe_info *wqe_info;
+ } db;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ void __iomem *uar_map;
+ u32 sqn;
+ u16 reserved_room;
+ unsigned long state;
+ struct mlx5e_ktls_resync_resp *ktls_resync;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5e_channel *channel;
+
+ struct work_struct recover_work;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_wqe_frag_info {
+ union mlx5e_alloc_unit *au;
+ u32 offset;
+ bool last_in_page;
+};
+
+struct mlx5e_mpw_info {
+ u16 consumed_strides;
+ DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
+ union mlx5e_alloc_unit alloc_units[];
+};
+
+#define MLX5E_MAX_RX_FRAGS 4
+
+/* a single cache unit is capable to serve one napi call (for non-striding rq)
+ * or a MPWQE (for striding rq).
+ */
+#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
+ MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
+#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
+struct mlx5e_page_cache {
+ u32 head;
+ u32 tail;
+ struct page *page_cache[MLX5E_CACHE_SIZE];
+};
+
+struct mlx5e_rq;
+typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
+typedef struct sk_buff *
+(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+typedef struct sk_buff *
+(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt);
+typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
+typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool);
+
+int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk);
+void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params);
+
+enum mlx5e_rq_flag {
+ MLX5E_RQ_FLAG_XDP_XMIT,
+ MLX5E_RQ_FLAG_XDP_REDIRECT,
+};
+
+struct mlx5e_rq_frag_info {
+ int frag_size;
+ int frag_stride;
+};
+
+struct mlx5e_rq_frags_info {
+ struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS];
+ u8 num_frags;
+ u8 log_num_frags;
+ u8 wqe_bulk;
+ u8 wqe_index_mask;
+};
+
+struct mlx5e_dma_info {
+ dma_addr_t addr;
+ struct page *page;
+};
+
+struct mlx5e_shampo_hd {
+ u32 mkey;
+ struct mlx5e_dma_info *info;
+ struct page *last_page;
+ u16 hd_per_wq;
+ u16 hd_per_wqe;
+ unsigned long *bitmap;
+ u16 pi;
+ u16 ci;
+ __be32 key;
+ u64 last_addr;
+};
+
+struct mlx5e_hw_gro_data {
+ struct sk_buff *skb;
+ struct flow_keys fk;
+ int second_ip_id;
+};
+
+enum mlx5e_mpwrq_umr_mode {
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED,
+ MLX5E_MPWRQ_UMR_MODE_UNALIGNED,
+ MLX5E_MPWRQ_UMR_MODE_OVERSIZED,
+ MLX5E_MPWRQ_UMR_MODE_TRIPLE,
+};
+
+struct mlx5e_rq {
+ /* data path */
+ union {
+ struct {
+ struct mlx5_wq_cyc wq;
+ struct mlx5e_wqe_frag_info *frags;
+ union mlx5e_alloc_unit *alloc_units;
+ struct mlx5e_rq_frags_info info;
+ mlx5e_fp_skb_from_cqe skb_from_cqe;
+ } wqe;
+ struct {
+ struct mlx5_wq_ll wq;
+ struct mlx5e_umr_wqe umr_wqe;
+ struct mlx5e_mpw_info *info;
+ mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
+ __be32 umr_mkey_be;
+ u16 num_strides;
+ u16 actual_wq_head;
+ u8 log_stride_sz;
+ u8 umr_in_progress;
+ u8 umr_last_bulk;
+ u8 umr_completed;
+ u8 min_wqe_bulk;
+ u8 page_shift;
+ u8 pages_per_wqe;
+ u8 umr_wqebbs;
+ u8 mtts_per_wqe;
+ u8 umr_mode;
+ struct mlx5e_shampo_hd *shampo;
+ } mpwqe;
+ };
+ struct {
+ u16 headroom;
+ u32 frame0_sz;
+ u8 map_dir; /* dma map direction */
+ } buff;
+
+ struct device *pdev;
+ struct net_device *netdev;
+ struct mlx5e_rq_stats *stats;
+ struct mlx5e_cq cq;
+ struct mlx5e_cq_decomp cqd;
+ struct mlx5e_page_cache page_cache;
+ struct hwtstamp_config *tstamp;
+ struct mlx5_clock *clock;
+ struct mlx5e_icosq *icosq;
+ struct mlx5e_priv *priv;
+
+ struct mlx5e_hw_gro_data *hw_gro_data;
+
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe;
+ mlx5e_fp_post_rx_wqes post_wqes;
+ mlx5e_fp_dealloc_wqe dealloc_wqe;
+
+ unsigned long state;
+ int ix;
+ unsigned int hw_mtu;
+
+ struct dim dim; /* Dynamic Interrupt Moderation */
+
+ /* XDP */
+ struct bpf_prog __rcu *xdp_prog;
+ struct mlx5e_xdpsq *xdpsq;
+ DECLARE_BITMAP(flags, 8);
+ struct page_pool *page_pool;
+
+ /* AF_XDP zero-copy */
+ struct xsk_buff_pool *xsk_pool;
+
+ struct work_struct recover_work;
+
+ /* control */
+ struct mlx5_wq_ctrl wq_ctrl;
+ __be32 mkey_be;
+ u8 wq_type;
+ u32 rqn;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_channel *channel;
+ struct mlx5e_dma_info wqe_overflow;
+
+ /* XDP read-mostly */
+ struct xdp_rxq_info xdp_rxq;
+ cqe_ts_to_ns ptp_cyc2time;
+} ____cacheline_aligned_in_smp;
+
+enum mlx5e_channel_state {
+ MLX5E_CHANNEL_STATE_XSK,
+ MLX5E_CHANNEL_NUM_STATES
+};
+
+struct mlx5e_channel {
+ /* data path */
+ struct mlx5e_rq rq;
+ struct mlx5e_xdpsq rq_xdpsq;
+ struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_icosq icosq; /* internal control operations */
+ struct mlx5e_txqsq __rcu * __rcu *qos_sqs;
+ bool xdp;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+ u16 qos_sqs_size;
+ u8 num_tc;
+ u8 lag_port;
+
+ /* XDP_REDIRECT */
+ struct mlx5e_xdpsq xdpsq;
+
+ /* AF_XDP zero-copy */
+ struct mlx5e_rq xskrq;
+ struct mlx5e_xdpsq xsksq;
+
+ /* Async ICOSQ */
+ struct mlx5e_icosq async_icosq;
+ /* async_icosq can be accessed from any CPU - the spinlock protects it. */
+ spinlock_t async_icosq_lock;
+
+ /* data path - accessed per napi poll */
+ const struct cpumask *aff_mask;
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
+ int ix;
+ int cpu;
+ /* Sync between icosq recovery and XSK enable/disable. */
+ struct mutex icosq_recovery_lock;
+};
+
+struct mlx5e_ptp;
+
+struct mlx5e_channels {
+ struct mlx5e_channel **c;
+ struct mlx5e_ptp *ptp;
+ unsigned int num;
+ struct mlx5e_params params;
+};
+
+struct mlx5e_channel_stats {
+ struct mlx5e_ch_stats ch;
+ struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rq_stats rq;
+ struct mlx5e_rq_stats xskrq;
+ struct mlx5e_xdpsq_stats rq_xdpsq;
+ struct mlx5e_xdpsq_stats xdpsq;
+ struct mlx5e_xdpsq_stats xsksq;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_ptp_stats {
+ struct mlx5e_ch_stats ch;
+ struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rq_stats rq;
+} ____cacheline_aligned_in_smp;
+
+enum {
+ MLX5E_STATE_OPENED,
+ MLX5E_STATE_DESTROYING,
+ MLX5E_STATE_XDP_TX_ENABLED,
+ MLX5E_STATE_XDP_ACTIVE,
+ MLX5E_STATE_CHANNELS_ACTIVE,
+};
+
+struct mlx5e_modify_sq_param {
+ int curr_state;
+ int next_state;
+ int rl_update;
+ int rl_index;
+ bool qos_update;
+ u16 qos_queue_group_id;
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+struct mlx5e_hv_vhca_stats_agent {
+ struct mlx5_hv_vhca_agent *agent;
+ struct delayed_work work;
+ u16 delay;
+ void *buf;
+};
+#endif
+
+struct mlx5e_xsk {
+ /* XSK buffer pools are stored separately from channels,
+ * because we don't want to lose them when channels are
+ * recreated. The kernel also stores buffer pool, but it doesn't
+ * distinguish between zero-copy and non-zero-copy UMEMs, so
+ * rely on our mechanism.
+ */
+ struct xsk_buff_pool **pools;
+ u16 refcnt;
+ bool ever_used;
+};
+
+/* Temporary storage for variables that are allocated when struct mlx5e_priv is
+ * initialized, and used where we can't allocate them because that functions
+ * must not fail. Use with care and make sure the same variable is not used
+ * simultaneously by multiple users.
+ */
+struct mlx5e_scratchpad {
+ cpumask_var_t cpumask;
+};
+
+struct mlx5e_trap;
+struct mlx5e_htb;
+
+struct mlx5e_priv {
+ /* priv data path fields - start */
+ struct mlx5e_selq selq;
+ struct mlx5e_txqsq **txq2sq;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ struct mlx5e_dcbx_dp dcbx_dp;
+#endif
+ /* priv data path fields - end */
+
+ u32 msglevel;
+ unsigned long state;
+ struct mutex state_lock; /* Protects Interface state */
+ struct mlx5e_rq drop_rq;
+
+ struct mlx5e_channels channels;
+ u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC];
+ struct mlx5e_rx_res *rx_res;
+ u32 *tx_rates;
+
+ struct mlx5e_flow_steering *fs;
+
+ struct workqueue_struct *wq;
+ struct work_struct update_carrier_work;
+ struct work_struct set_rx_mode_work;
+ struct work_struct tx_timeout_work;
+ struct work_struct update_stats_work;
+ struct work_struct monitor_counters_work;
+ struct mlx5_nb monitor_counters_nb;
+
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_trap *en_trap;
+ struct mlx5e_stats stats;
+ struct mlx5e_channel_stats **channel_stats;
+ struct mlx5e_channel_stats trap_stats;
+ struct mlx5e_ptp_stats ptp_stats;
+ struct mlx5e_sq_stats **htb_qos_sq_stats;
+ u16 htb_max_qos_sqs;
+ u16 stats_nch;
+ u16 max_nch;
+ u8 max_opened_tc;
+ bool tx_ptp_opened;
+ bool rx_ptp_opened;
+ struct hwtstamp_config tstamp;
+ u16 q_counter;
+ u16 drop_rq_q_counter;
+ struct notifier_block events_nb;
+ struct notifier_block blocking_events_nb;
+
+ struct udp_tunnel_nic_info nic_info;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ struct mlx5e_dcbx dcbx;
+#endif
+
+ const struct mlx5e_profile *profile;
+ void *ppriv;
+#ifdef CONFIG_MLX5_EN_MACSEC
+ struct mlx5e_macsec *macsec;
+#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5e_ipsec *ipsec;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_tls *tls;
+#endif
+ struct devlink_health_reporter *tx_reporter;
+ struct devlink_health_reporter *rx_reporter;
+ struct mlx5e_xsk xsk;
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+ struct mlx5e_hv_vhca_stats_agent stats_agent;
+#endif
+ struct mlx5e_scratchpad scratchpad;
+ struct mlx5e_htb *htb;
+ struct mlx5e_mqprio_rl *mqprio_rl;
+};
+
+struct mlx5e_rx_handlers {
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe;
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe_shampo;
+};
+
+extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic;
+
+enum mlx5e_profile_feature {
+ MLX5E_PROFILE_FEATURE_PTP_RX,
+ MLX5E_PROFILE_FEATURE_PTP_TX,
+ MLX5E_PROFILE_FEATURE_QOS_HTB,
+ MLX5E_PROFILE_FEATURE_FS_VLAN,
+ MLX5E_PROFILE_FEATURE_FS_TC,
+};
+
+struct mlx5e_profile {
+ int (*init)(struct mlx5_core_dev *mdev,
+ struct net_device *netdev);
+ void (*cleanup)(struct mlx5e_priv *priv);
+ int (*init_rx)(struct mlx5e_priv *priv);
+ void (*cleanup_rx)(struct mlx5e_priv *priv);
+ int (*init_tx)(struct mlx5e_priv *priv);
+ void (*cleanup_tx)(struct mlx5e_priv *priv);
+ void (*enable)(struct mlx5e_priv *priv);
+ void (*disable)(struct mlx5e_priv *priv);
+ int (*update_rx)(struct mlx5e_priv *priv);
+ void (*update_stats)(struct mlx5e_priv *priv);
+ void (*update_carrier)(struct mlx5e_priv *priv);
+ int (*max_nch_limit)(struct mlx5_core_dev *mdev);
+ unsigned int (*stats_grps_num)(struct mlx5e_priv *priv);
+ mlx5e_stats_grp_t *stats_grps;
+ const struct mlx5e_rx_handlers *rx_handlers;
+ int max_tc;
+ u32 features;
+};
+
+#define mlx5e_profile_feature_cap(profile, feature) \
+ ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature))
+
+void mlx5e_build_ptys2ethtool_map(void);
+
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+
+void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close);
+void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
+void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
+
+int mlx5e_self_test_num(struct mlx5e_priv *priv);
+int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data);
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+ u64 *buf);
+void mlx5e_set_rx_mode_work(struct work_struct *work);
+
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr);
+int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr);
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter);
+
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
+ u16 vid);
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
+ u16 vid);
+void mlx5e_timestamp_init(struct mlx5e_priv *priv);
+
+struct mlx5e_xsk_param;
+
+struct mlx5e_rq_param;
+int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
+ struct mlx5e_xsk_param *xsk, int node,
+ struct mlx5e_rq *rq);
+#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
+void mlx5e_close_rq(struct mlx5e_rq *rq);
+int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param);
+void mlx5e_destroy_rq(struct mlx5e_rq *rq);
+
+struct mlx5e_sq_param;
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
+ struct mlx5e_xdpsq *sq, bool is_redirect);
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
+
+struct mlx5e_create_cq_param {
+ struct napi_struct *napi;
+ struct mlx5e_ch_stats *ch_stats;
+ int node;
+ int ix;
+};
+
+struct mlx5e_cq_param;
+int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder,
+ struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp,
+ struct mlx5e_cq *cq);
+void mlx5e_close_cq(struct mlx5e_cq *cq);
+
+int mlx5e_open_locked(struct net_device *netdev);
+int mlx5e_close_locked(struct net_device *netdev);
+
+void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c);
+void mlx5e_trigger_napi_sched(struct napi_struct *napi);
+
+int mlx5e_open_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs);
+void mlx5e_close_channels(struct mlx5e_channels *chs);
+
+/* Function pointer to be used to modify HW or kernel settings while
+ * switching channels
+ */
+typedef int (*mlx5e_fp_preactivate)(struct mlx5e_priv *priv, void *context);
+#define MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(fn) \
+int fn##_ctx(struct mlx5e_priv *priv, void *context) \
+{ \
+ return fn(priv); \
+}
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
+int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
+ struct mlx5e_params *new_params,
+ mlx5e_fp_preactivate preactivate,
+ void *context, bool reset);
+int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
+int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
+void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
+void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
+int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
+
+int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state);
+void mlx5e_activate_rq(struct mlx5e_rq *rq);
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
+void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
+void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
+
+int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+ struct mlx5e_modify_sq_param *p);
+int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
+ struct mlx5e_params *params, struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id,
+ struct mlx5e_sq_stats *sq_stats);
+void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
+void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq);
+void mlx5e_free_txqsq(struct mlx5e_txqsq *sq);
+void mlx5e_tx_disable_queue(struct netdev_queue *txq);
+int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa);
+void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq);
+struct mlx5e_create_sq_param;
+int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_create_sq_param *csp,
+ u16 qos_queue_group_id,
+ u32 *sqn);
+void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
+void mlx5e_close_txqsq(struct mlx5e_txqsq *sq);
+
+static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_ETH(mdev, swp) &&
+ MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso);
+}
+
+extern const struct ethtool_ops mlx5e_ethtool_ops;
+
+int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey);
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
+ bool enable_mc_lb);
+void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc);
+
+/* common netdev helpers */
+void mlx5e_create_q_counters(struct mlx5e_priv *priv);
+void mlx5e_destroy_q_counters(struct mlx5e_priv *priv);
+int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+ struct mlx5e_rq *drop_rq);
+void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
+
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
+void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
+
+int mlx5e_create_tises(struct mlx5e_priv *priv);
+void mlx5e_destroy_tises(struct mlx5e_priv *priv);
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv);
+void mlx5e_update_carrier(struct mlx5e_priv *priv);
+int mlx5e_close(struct net_device *netdev);
+int mlx5e_open(struct net_device *netdev);
+
+void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
+
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv);
+int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context);
+int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
+ mlx5e_fp_preactivate preactivate);
+void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv);
+
+/* ethtool helpers */
+void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
+ struct ethtool_drvinfo *drvinfo);
+void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv,
+ uint32_t stringset, uint8_t *data);
+int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset);
+void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
+ struct ethtool_stats *stats, u64 *data);
+void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param);
+int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param);
+void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch);
+int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch);
+int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal);
+int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack);
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+ struct ethtool_link_ksettings *link_ksettings);
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+ const struct ethtool_link_ksettings *link_ksettings);
+int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key,
+ const u8 hfunc);
+int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
+ u32 *rule_locs);
+int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd);
+u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
+u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
+int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
+ struct ethtool_ts_info *info);
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ struct ethtool_flash *flash);
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam);
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam);
+
+/* mlx5e generic netdev management API */
+static inline bool
+mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev)
+{
+ return !is_kdump_kernel() &&
+ MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe);
+}
+
+int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev);
+int mlx5e_priv_init(struct mlx5e_priv *priv,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev,
+ struct mlx5_core_dev *mdev);
+void mlx5e_priv_cleanup(struct mlx5e_priv *priv);
+struct net_device *
+mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile);
+int mlx5e_attach_netdev(struct mlx5e_priv *priv);
+void mlx5e_detach_netdev(struct mlx5e_priv *priv);
+void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
+int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
+ const struct mlx5e_profile *new_profile, void *new_ppriv);
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
+void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
+void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
+void mlx5e_rx_dim_work(struct work_struct *work);
+void mlx5e_tx_dim_work(struct work_struct *work);
+
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features);
+int mlx5e_set_features(struct net_device *netdev, netdev_features_t features);
+#ifdef CONFIG_MLX5_ESWITCH
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate);
+int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi);
+int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats);
+#endif
+#endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
new file mode 100644
index 000000000..48581ea3a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "channels.h"
+#include "en.h"
+#include "en/ptp.h"
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
+{
+ return chs->num;
+}
+
+static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix)
+{
+ WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs));
+ return chs->c[ix];
+}
+
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+}
+
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ *rqn = c->rq.rqn;
+}
+
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state));
+
+ *rqn = c->xskrq.rqn;
+}
+
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
+{
+ struct mlx5e_ptp *c = chs->ptp;
+
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+ return false;
+
+ *rqn = c->rq.rqn;
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
new file mode 100644
index 000000000..637ca90da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_CHANNELS_H__
+#define __MLX5_EN_CHANNELS_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_channels;
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix);
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
+
+#endif /* __MLX5_EN_CHANNELS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
new file mode 100644
index 000000000..b59aee75d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5E_DCBNL_H__
+#define __MLX5E_DCBNL_H__
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+
+#define MLX5E_MAX_PRIORITY (8)
+
+struct mlx5e_cee_config {
+ /* bw pct for priority group */
+ u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
+ u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+ bool pfc_setting[CEE_DCBX_MAX_PRIO];
+ bool pfc_enable;
+};
+
+struct mlx5e_dcbx {
+ enum mlx5_dcbx_oper_mode mode;
+ struct mlx5e_cee_config cee_cfg; /* pending configuration */
+ u8 dscp_app_cnt;
+
+ /* The only setting that cannot be read from FW */
+ u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
+ u8 cap;
+
+ /* Buffer configuration */
+ bool manual_buffer;
+ u32 cable_len;
+ u32 xoff;
+ u16 port_buff_cell_sz;
+};
+
+#define MLX5E_MAX_DSCP (64)
+
+struct mlx5e_dcbx_dp {
+ u8 dscp2prio[MLX5E_MAX_DSCP];
+ u8 trust_state;
+};
+
+void mlx5e_dcbnl_build_netdev(struct net_device *netdev);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
+#else
+static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {}
+static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5E_DCBNL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
new file mode 100644
index 000000000..b69f9d10c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "en/devlink.h"
+#include "eswitch.h"
+
+static void
+mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
+{
+ u64 parent_id;
+
+ parent_id = mlx5_query_nic_system_image_guid(dev);
+ ppid->id_len = sizeof(parent_id);
+ memcpy(ppid->id, &parent_id, sizeof(parent_id));
+}
+
+int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
+{
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
+ struct devlink_port_attrs attrs = {};
+ struct netdev_phys_item_id ppid = {};
+ struct devlink_port *dl_port;
+ unsigned int dl_port_index;
+ int ret;
+
+ if (mlx5_core_is_pf(priv->mdev)) {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.phys.port_number = mlx5_get_dev_index(priv->mdev);
+ if (MLX5_ESWITCH_MANAGER(priv->mdev)) {
+ mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid);
+ memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
+ attrs.switch_id.id_len = ppid.id_len;
+ }
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev,
+ MLX5_VPORT_UPLINK);
+ } else {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0);
+ }
+
+ dl_port = mlx5e_devlink_get_dl_port(priv);
+ memset(dl_port, 0, sizeof(*dl_port));
+ devlink_port_attrs_set(dl_port, &attrs);
+
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ ret = devl_port_register(devlink, dl_port, dl_port_index);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
+
+ return ret;
+}
+
+void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+
+ devlink_port_type_eth_set(dl_port, priv->netdev);
+}
+
+void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
+
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ devl_port_unregister(dl_port);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
+}
+
+struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (!netif_device_present(dev))
+ return NULL;
+
+ return mlx5e_devlink_get_dl_port(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
new file mode 100644
index 000000000..10b50feb9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_EN_DEVLINK_H
+#define __MLX5E_EN_DEVLINK_H
+
+#include <net/devlink.h>
+#include "en.h"
+
+int mlx5e_devlink_port_register(struct mlx5e_priv *priv);
+void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv);
+void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv);
+struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev);
+
+static inline struct devlink_port *
+mlx5e_devlink_get_dl_port(struct mlx5e_priv *priv)
+{
+ return &priv->mdev->mlx5e_res.dl_port;
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
new file mode 100644
index 000000000..bf2741eb7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5E_FLOW_STEER_H__
+#define __MLX5E_FLOW_STEER_H__
+
+#include "mod_hdr.h"
+#include "lib/fs_ttc.h"
+
+struct mlx5e_post_act;
+struct mlx5e_tc_table;
+
+enum {
+ MLX5E_TC_FT_LEVEL = 0,
+ MLX5E_TC_TTC_FT_LEVEL,
+ MLX5E_TC_MISS_LEVEL,
+};
+
+enum {
+ MLX5E_TC_PRIO = 0,
+ MLX5E_NIC_PRIO
+};
+
+struct mlx5e_flow_table {
+ int num_groups;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_group **g;
+};
+
+struct mlx5e_l2_rule {
+ u8 addr[ETH_ALEN + 2];
+ struct mlx5_flow_handle *rule;
+};
+
+#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE)
+
+struct mlx5e_promisc_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *rule;
+};
+
+/* Forward declaration and APIs to get private fields of vlan_table */
+struct mlx5e_vlan_table;
+unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan);
+struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan);
+
+struct mlx5e_l2_table {
+ struct mlx5e_flow_table ft;
+ struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE];
+ struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE];
+ struct mlx5e_l2_rule broadcast;
+ struct mlx5e_l2_rule allmulti;
+ struct mlx5_flow_handle *trap_rule;
+ bool broadcast_enabled;
+ bool allmulti_enabled;
+ bool promisc_enabled;
+};
+
+#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1)
+
+#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP)
+#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_L4_SPORT |\
+ MLX5_HASH_FIELD_SEL_L4_DPORT)
+#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
+/* NIC prio FTS */
+enum {
+ MLX5E_PROMISC_FT_LEVEL,
+ MLX5E_VLAN_FT_LEVEL,
+ MLX5E_L2_FT_LEVEL,
+ MLX5E_TTC_FT_LEVEL,
+ MLX5E_INNER_TTC_FT_LEVEL,
+ MLX5E_FS_TT_UDP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+ MLX5E_FS_TT_ANY_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+#ifdef CONFIG_MLX5_EN_TLS
+ MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+#endif
+#ifdef CONFIG_MLX5_EN_ARFS
+ MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+ MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
+#endif
+};
+
+struct mlx5e_flow_steering;
+struct mlx5e_rx_res;
+
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables;
+
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple);
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple);
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs);
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs);
+int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id);
+#else
+static inline int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
+{ return 0; }
+static inline void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) {}
+static inline int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
+#endif
+
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp;
+#endif
+
+struct mlx5e_profile;
+struct mlx5e_fs_udp;
+struct mlx5e_fs_any;
+struct mlx5e_ptp_fs;
+
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ struct ttc_params *ttc_params, bool tunnel);
+
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs);
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res);
+
+void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev);
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile);
+
+struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
+ struct mlx5_core_dev *mdev,
+ bool state_destroy);
+void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs);
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc);
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs);
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress);
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress);
+#ifdef CONFIG_MLX5_EN_RXNFC
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs);
+#endif
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner);
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner);
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs);
+#endif
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs);
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any);
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp);
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp);
+#endif
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy);
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, bool vlan_strip_disable);
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs);
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs);
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+
+#define fs_err(fs, fmt, ...) \
+ mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_dbg(fs, fmt, ...) \
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn(fs, fmt, ...) \
+ mlx5_core_warn(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn_once(fs, fmt, ...) \
+ mlx5_core_warn_once(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#endif /* __MLX5E_FLOW_STEER_H__ */
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
new file mode 100644
index 000000000..9e276fd3c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5E_FS_ETHTOOL_H__
+#define __MLX5E_FS_ETHTOOL_H__
+
+struct mlx5e_priv;
+struct mlx5e_ethtool_steering;
+#ifdef CONFIG_MLX5_EN_RXNFC
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool);
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool);
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs);
+int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd);
+int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs);
+#else
+static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{ return 0; }
+static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { }
+static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { }
+static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { }
+static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{ return -EOPNOTSUPP; }
+#endif
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
new file mode 100644
index 000000000..671adbad0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "en/fs_tt_redirect.h"
+#include "fs_core.h"
+#include "mlx5_core.h"
+
+enum fs_udp_type {
+ FS_IPV4_UDP,
+ FS_IPV6_UDP,
+ FS_UDP_NUM_TYPES,
+};
+
+struct mlx5e_fs_udp {
+ struct mlx5e_flow_table tables[FS_UDP_NUM_TYPES];
+ struct mlx5_flow_handle *default_rules[FS_UDP_NUM_TYPES];
+ int ref_cnt;
+};
+
+struct mlx5e_fs_any {
+ struct mlx5e_flow_table table;
+ struct mlx5_flow_handle *default_rule;
+ int ref_cnt;
+};
+
+static char *fs_udp_type2str(enum fs_udp_type i)
+{
+ switch (i) {
+ case FS_IPV4_UDP:
+ return "UDP v4";
+ default: /* FS_IPV6_UDP */
+ return "UDP v6";
+ }
+}
+
+static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i)
+{
+ switch (i) {
+ case FS_IPV4_UDP:
+ return MLX5_TT_IPV4_UDP;
+ default: /* FS_IPV6_UDP */
+ return MLX5_TT_IPV6_UDP;
+ }
+}
+
+static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i)
+{
+ switch (i) {
+ case MLX5_TT_IPV4_UDP:
+ return FS_IPV4_UDP;
+ case MLX5_TT_IPV6_UDP:
+ return FS_IPV6_UDP;
+ default:
+ return FS_UDP_NUM_TYPES;
+ }
+}
+
+void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule)
+{
+ mlx5_del_flow_rules(rule);
+}
+
+static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type type,
+ u16 udp_dport)
+{
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+ type == FS_IPV4_UDP ? 4 : 6);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, udp_dport);
+}
+
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5_traffic_types ttc_type,
+ u32 tir_num, u16 d_port)
+{
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ enum fs_udp_type type = tt2fs_udp(ttc_type);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_table *ft = NULL;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ if (type == FS_UDP_NUM_TYPES)
+ return ERR_PTR(-EINVAL);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ft = fs_udp->tables[type].t;
+
+ fs_udp_set_dport_flow(spec, type, d_port);
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tir_num;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add %s rule failed, err %d\n",
+ __func__, fs_udp_type2str(type), err);
+ }
+ return rule;
+}
+
+static int fs_udp_add_default_rule(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ struct mlx5e_flow_table *fs_udp_t;
+ struct mlx5_flow_destination dest;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ fs_udp_t = &fs_udp->tables[type];
+
+ dest = mlx5_ttc_get_default_dest(ttc, fs_udp2tt(type));
+ rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add default rule failed, fs type=%d, err %d\n",
+ __func__, type, err);
+ return err;
+ }
+
+ fs_udp->default_rules[type] = rule;
+ return 0;
+}
+
+#define MLX5E_FS_UDP_NUM_GROUPS (2)
+#define MLX5E_FS_UDP_GROUP1_SIZE (BIT(16))
+#define MLX5E_FS_UDP_GROUP2_SIZE (BIT(0))
+#define MLX5E_FS_UDP_TABLE_SIZE (MLX5E_FS_UDP_GROUP1_SIZE +\
+ MLX5E_FS_UDP_GROUP2_SIZE)
+static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type type)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kfree(ft->g);
+ ft->g = NULL;
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version);
+
+ switch (type) {
+ case FS_IPV4_UDP:
+ case FS_IPV6_UDP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ /* Match on udp protocol, Ipv4/6 and dport */
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_UDP_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Default Flow Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_UDP_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+out:
+ kvfree(in);
+
+ return err;
+}
+
+static int fs_udp_create_table(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
+{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_flow_table *ft;
+ int err;
+
+ ft = &fs_udp->tables[type];
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
+ ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs %s table id %u level %u\n",
+ fs_udp_type2str(type), ft->t->id, ft->t->level);
+
+ err = fs_udp_create_groups(ft, type);
+ if (err)
+ goto err;
+
+ err = fs_udp_add_default_rule(fs, type);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i)
+{
+ if (IS_ERR_OR_NULL(fs_udp->tables[i].t))
+ return;
+
+ mlx5_del_flow_rules(fs_udp->default_rules[i]);
+ mlx5e_destroy_flow_table(&fs_udp->tables[i]);
+ fs_udp->tables[i].t = NULL;
+}
+
+static int fs_udp_disable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err, i;
+
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+ /* Modify ttc rules destination to point back to the indir TIRs */
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_udp2tt(i));
+ if (err) {
+ fs_err(fs, "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int fs_udp_enable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
+ struct mlx5_flow_destination dest = {};
+ int err, i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+ dest.ft = udp->tables[i].t;
+
+ /* Modify ttc rules destination to point on the accel_fs FTs */
+ err = mlx5_ttc_fwd_dest(ttc, fs_udp2tt(i), &dest);
+ if (err) {
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
+ return err;
+ }
+ }
+ return 0;
+}
+
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ int i;
+
+ if (!fs_udp)
+ return;
+
+ if (--fs_udp->ref_cnt)
+ return;
+
+ fs_udp_disable(fs);
+
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++)
+ fs_udp_destroy_table(fs_udp, i);
+
+ kfree(fs_udp);
+ mlx5e_fs_set_udp(fs, NULL);
+}
+
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
+ int i, err;
+
+ if (udp) {
+ udp->ref_cnt++;
+ return 0;
+ }
+
+ udp = kzalloc(sizeof(*udp), GFP_KERNEL);
+ if (!udp)
+ return -ENOMEM;
+ mlx5e_fs_set_udp(fs, udp);
+
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+ err = fs_udp_create_table(fs, i);
+ if (err)
+ goto err_destroy_tables;
+ }
+
+ err = fs_udp_enable(fs);
+ if (err)
+ goto err_destroy_tables;
+
+ udp->ref_cnt = 1;
+
+ return 0;
+
+err_destroy_tables:
+ while (--i >= 0)
+ fs_udp_destroy_table(udp, i);
+
+ kfree(udp);
+ mlx5e_fs_set_udp(fs, NULL);
+ return err;
+}
+
+static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_type)
+{
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ether_type);
+}
+
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
+ u32 tir_num, u16 ether_type)
+{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_table *ft = NULL;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ft = fs_any->table.t;
+
+ fs_any_set_ethertype_flow(spec, ether_type);
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tir_num;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add ANY rule failed, err %d\n",
+ __func__, err);
+ }
+ return rule;
+}
+
+static int fs_any_add_default_rule(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5e_flow_table *fs_any_t;
+ struct mlx5_flow_destination dest;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ fs_any_t = &fs_any->table;
+ dest = mlx5_ttc_get_default_dest(ttc, MLX5_TT_ANY);
+ rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add default rule failed, fs type=ANY, err %d\n",
+ __func__, err);
+ return err;
+ }
+
+ fs_any->default_rule = rule;
+ return 0;
+}
+
+#define MLX5E_FS_ANY_NUM_GROUPS (2)
+#define MLX5E_FS_ANY_GROUP1_SIZE (BIT(16))
+#define MLX5E_FS_ANY_GROUP2_SIZE (BIT(0))
+#define MLX5E_FS_ANY_TABLE_SIZE (MLX5E_FS_ANY_GROUP1_SIZE +\
+ MLX5E_FS_ANY_GROUP2_SIZE)
+
+static int fs_any_create_groups(struct mlx5e_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kfree(ft->g);
+ ft->g = NULL;
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ /* Match on ethertype */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_ANY_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Default Flow Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_ANY_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static int fs_any_create_table(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5e_flow_table *ft = &fs_any->table;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
+ ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs ANY table id %u level %u\n",
+ ft->t->id, ft->t->level);
+
+ err = fs_any_create_groups(ft);
+ if (err)
+ goto err;
+
+ err = fs_any_add_default_rule(fs);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static int fs_any_disable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err;
+
+ /* Modify ttc rules destination to point back to the indir TIRs */
+ err = mlx5_ttc_fwd_default_dest(ttc, MLX5_TT_ANY);
+ if (err) {
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
+ return err;
+ }
+ return 0;
+}
+
+static int fs_any_enable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *any = mlx5e_fs_get_any(fs);
+ struct mlx5_flow_destination dest = {};
+ int err;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = any->table.t;
+
+ /* Modify ttc rules destination to point on the accel_fs FTs */
+ err = mlx5_ttc_fwd_dest(ttc, MLX5_TT_ANY, &dest);
+ if (err) {
+ fs_err(fs,
+ "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
+ return err;
+ }
+ return 0;
+}
+
+static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any)
+{
+ if (IS_ERR_OR_NULL(fs_any->table.t))
+ return;
+
+ mlx5_del_flow_rules(fs_any->default_rule);
+ mlx5e_destroy_flow_table(&fs_any->table);
+ fs_any->table.t = NULL;
+}
+
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+
+ if (!fs_any)
+ return;
+
+ if (--fs_any->ref_cnt)
+ return;
+
+ fs_any_disable(fs);
+
+ fs_any_destroy_table(fs_any);
+
+ kfree(fs_any);
+ mlx5e_fs_set_any(fs, NULL);
+}
+
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ int err;
+
+ if (fs_any) {
+ fs_any->ref_cnt++;
+ return 0;
+ }
+
+ fs_any = kzalloc(sizeof(*fs_any), GFP_KERNEL);
+ if (!fs_any)
+ return -ENOMEM;
+ mlx5e_fs_set_any(fs, fs_any);
+
+ err = fs_any_create_table(fs);
+ if (err)
+ goto err_free_any;
+
+ err = fs_any_enable(fs);
+ if (err)
+ goto err_destroy_table;
+
+ fs_any->ref_cnt = 1;
+
+ return 0;
+
+err_destroy_table:
+ fs_any_destroy_table(fs_any);
+err_free_any:
+ mlx5e_fs_set_any(fs, NULL);
+ kfree(fs_any);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
new file mode 100644
index 000000000..5780fd7ad
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5E_FS_TT_REDIRECT_H__
+#define __MLX5E_FS_TT_REDIRECT_H__
+
+#include "en/fs.h"
+
+void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
+
+/* UDP traffic type redirect */
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5_traffic_types ttc_type,
+ u32 tir_num, u16 d_port);
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs);
+
+/* ANY traffic type redirect*/
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
+ u32 tir_num, u16 ether_type);
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
new file mode 100644
index 000000000..6f4e6c34b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "health.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+{
+ u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
+ u8 hw_status;
+ void *cqc;
+ int err;
+
+ err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
+ if (err)
+ return err;
+
+ cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
+ hw_status = MLX5_GET(cqc, cqc, status);
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+{
+ u8 cq_log_stride;
+ u32 cq_sz;
+ int err;
+
+ cq_sz = mlx5_cqwq_get_size(&cq->wq);
+ cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
+{
+ mlx5e_reporter_tx_create(priv);
+ mlx5e_reporter_rx_create(priv);
+}
+
+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
+{
+ mlx5e_reporter_rx_destroy(priv);
+ mlx5e_reporter_tx_destroy(priv);
+}
+
+void mlx5e_health_channels_update(struct mlx5e_priv *priv)
+{
+ if (priv->tx_reporter)
+ devlink_health_reporter_state_update(priv->tx_reporter,
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+ if (priv->rx_reporter)
+ devlink_health_reporter_state_update(priv->rx_reporter,
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+}
+
+int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
+{
+ struct mlx5e_modify_sq_param msp = {};
+ int err;
+
+ msp.curr_state = MLX5_SQC_STATE_ERR;
+ msp.next_state = MLX5_SQC_STATE_RST;
+
+ err = mlx5e_modify_sq(mdev, sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
+ return err;
+ }
+
+ memset(&msp, 0, sizeof(msp));
+ msp.curr_state = MLX5_SQC_STATE_RST;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+
+ err = mlx5e_modify_sq(mdev, sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
+{
+ int err = 0;
+
+ rtnl_lock();
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto out;
+
+ err = mlx5e_safe_reopen_channels(priv);
+
+out:
+ mutex_unlock(&priv->state_lock);
+ rtnl_unlock();
+
+ return err;
+}
+
+int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
+ struct mlx5e_ch_stats *stats)
+{
+ u32 eqe_count;
+
+ netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+ eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+
+ eqe_count = mlx5_eq_poll_irq_disabled(eq);
+ if (!eqe_count)
+ return -EIO;
+
+ netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
+ eqe_count, eq->core.eqn);
+
+ stats->eq_rearm++;
+ return 0;
+}
+
+int mlx5e_health_report(struct mlx5e_priv *priv,
+ struct devlink_health_reporter *reporter, char *err_str,
+ struct mlx5e_err_ctx *err_ctx)
+{
+ netdev_err(priv->netdev, "%s\n", err_str);
+
+ if (!reporter)
+ return err_ctx->recover(err_ctx->ctx);
+
+ return devlink_health_report(reporter, err_str, err_ctx);
+}
+
+#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
+static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
+ const void *value, u32 value_len)
+
+{
+ u32 data_size;
+ int err = 0;
+ u32 offset;
+
+ for (offset = 0; offset < value_len; offset += data_size) {
+ data_size = value_len - offset;
+ if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
+ data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
+ err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_rsc_dump_cmd *cmd;
+ struct page *page;
+ int cmd_err, err;
+ int end_err;
+ int size;
+
+ if (IS_ERR_OR_NULL(mdev->rsc_dump))
+ return -EOPNOTSUPP;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
+ if (err)
+ goto free_page;
+
+ cmd = mlx5_rsc_dump_cmd_create(mdev, key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+
+ do {
+ cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
+ if (cmd_err < 0) {
+ err = cmd_err;
+ goto destroy_cmd;
+ }
+
+ err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
+ if (err)
+ goto destroy_cmd;
+
+ } while (cmd_err > 0);
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+ end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
+ if (end_err)
+ err = end_err;
+free_page:
+ __free_page(page);
+ return err;
+}
+
+int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ int queue_idx, char *lbl)
+{
+ struct mlx5_rsc_key key = {};
+ int err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = queue_idx;
+ key.size = PAGE_SIZE;
+ key.num_of_obj1 = 1;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
+ if (err)
+ return err;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return devlink_fmsg_obj_nest_end(fmsg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
new file mode 100644
index 000000000..0107e4e73
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_EN_HEALTH_H
+#define __MLX5E_EN_HEALTH_H
+
+#include "en.h"
+#include "diag/rsc_dump.h"
+
+static inline bool cqe_syndrome_needs_recover(u8 syndrome)
+{
+ return syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+ syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
+ syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+}
+
+void mlx5e_reporter_tx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq);
+
+int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
+int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
+int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg);
+int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
+int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg);
+
+void mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
+void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c);
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c);
+
+#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
+
+struct mlx5e_err_ctx {
+ int (*recover)(void *ctx);
+ int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx);
+ void *ctx;
+};
+
+int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn);
+int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
+ struct mlx5e_ch_stats *stats);
+int mlx5e_health_recover_channels(struct mlx5e_priv *priv);
+int mlx5e_health_report(struct mlx5e_priv *priv,
+ struct devlink_health_reporter *reporter, char *err_str,
+ struct mlx5e_err_ctx *err_ctx);
+void mlx5e_health_create_reporters(struct mlx5e_priv *priv);
+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
+void mlx5e_health_channels_update(struct mlx5e_priv *priv);
+int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
+ struct devlink_fmsg *fmsg);
+int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ int queue_idx, char *lbl);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
new file mode 100644
index 000000000..09d441ecb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/pkt_cls.h>
+#include "htb.h"
+#include "en.h"
+#include "../qos.h"
+
+struct mlx5e_qos_node {
+ struct hlist_node hnode;
+ struct mlx5e_qos_node *parent;
+ u64 rate;
+ u32 bw_share;
+ u32 max_average_bw;
+ u32 hw_id;
+ u32 classid; /* 16-bit, except root. */
+ u16 qid;
+};
+
+struct mlx5e_htb {
+ DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
+ DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ struct mlx5e_selq *selq;
+};
+
+#define MLX5E_QOS_QID_INNER 0xffff
+#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
+
+/* Software representation of the QoS tree */
+
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt, err;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode) {
+ if (node->qid == MLX5E_QOS_QID_INNER)
+ continue;
+ err = callback(data, node->qid, node->hw_id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb)
+{
+ int last;
+
+ last = find_last_bit(htb->qos_used_qids, mlx5e_qos_max_leaf_nodes(htb->mdev));
+ return last == mlx5e_qos_max_leaf_nodes(htb->mdev) ? 0 : last + 1;
+}
+
+static int mlx5e_htb_find_unused_qos_qid(struct mlx5e_htb *htb)
+{
+ int size = mlx5e_qos_max_leaf_nodes(htb->mdev);
+ struct mlx5e_priv *priv = htb->priv;
+ int res;
+
+ WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
+ res = find_first_zero_bit(htb->qos_used_qids, size);
+
+ return res == size ? -ENOSPC : res;
+}
+
+static struct mlx5e_qos_node *
+mlx5e_htb_node_create_leaf(struct mlx5e_htb *htb, u16 classid, u16 qid,
+ struct mlx5e_qos_node *parent)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->parent = parent;
+
+ node->qid = qid;
+ __set_bit(qid, htb->qos_used_qids);
+
+ node->classid = classid;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, classid);
+
+ mlx5e_update_tx_netdev_queues(htb->priv);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_create_root(struct mlx5e_htb *htb)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->qid = MLX5E_QOS_QID_INNER;
+ node->classid = MLX5E_HTB_CLASSID_ROOT;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, node->classid);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_rcu(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible_rcu(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static void mlx5e_htb_node_delete(struct mlx5e_htb *htb, struct mlx5e_qos_node *node)
+{
+ hash_del_rcu(&node->hnode);
+ if (node->qid != MLX5E_QOS_QID_INNER) {
+ __clear_bit(node->qid, htb->qos_used_qids);
+ mlx5e_update_tx_netdev_queues(htb->priv);
+ }
+ /* Make sure this qid is no longer selected by mlx5e_select_queue, so
+ * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue.
+ */
+ synchronize_net();
+ kfree(node);
+}
+
+/* TX datapath API */
+
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid)
+{
+ struct mlx5e_qos_node *node;
+ u16 qid;
+ int res;
+
+ rcu_read_lock();
+
+ node = mlx5e_htb_node_find_rcu(htb, classid);
+ if (!node) {
+ res = -ENOENT;
+ goto out;
+ }
+ qid = READ_ONCE(node->qid);
+ if (qid == MLX5E_QOS_QID_INNER) {
+ res = -EINVAL;
+ goto out;
+ }
+ res = mlx5e_qid_from_qos(&htb->priv->channels, qid);
+
+out:
+ rcu_read_unlock();
+ return res;
+}
+
+/* HTB TC handlers */
+
+static int
+mlx5e_htb_root_add(struct mlx5e_htb *htb, u16 htb_maj_id, u16 htb_defcls,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
+
+ mlx5e_selq_prepare_htb(htb->selq, htb_maj_id, htb_defcls);
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ err = mlx5e_qos_alloc_queues(priv, &priv->channels);
+ if (err)
+ goto err_cancel_selq;
+ }
+
+ root = mlx5e_htb_node_create_root(htb);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto err_free_queues;
+ }
+
+ err = mlx5_qos_create_root_node(htb->mdev, &root->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
+ goto err_sw_node_delete;
+ }
+
+ mlx5e_selq_apply(htb->selq);
+
+ return 0;
+
+err_sw_node_delete:
+ mlx5e_htb_node_delete(htb, root);
+
+err_free_queues:
+ if (opened)
+ mlx5e_qos_close_all_queues(&priv->channels);
+err_cancel_selq:
+ mlx5e_selq_cancel(htb->selq);
+ return err;
+}
+
+static int mlx5e_htb_root_del(struct mlx5e_htb *htb)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_DESTROY\n");
+
+ /* Wait until real_num_tx_queues is updated for mlx5e_select_queue,
+ * so that we can safely switch to its non-HTB non-PTP fastpath.
+ */
+ synchronize_net();
+
+ mlx5e_selq_prepare_htb(htb->selq, 0, 0);
+ mlx5e_selq_apply(htb->selq);
+
+ root = mlx5e_htb_node_find(htb, MLX5E_HTB_CLASSID_ROOT);
+ if (!root) {
+ qos_err(htb->mdev, "Failed to find the root node in the QoS tree\n");
+ return -ENOENT;
+ }
+ err = mlx5_qos_destroy_node(htb->mdev, root->hw_id);
+ if (err)
+ qos_err(htb->mdev, "Failed to destroy root node %u, err = %d\n",
+ root->hw_id, err);
+ mlx5e_htb_node_delete(htb, root);
+
+ mlx5e_qos_deactivate_all_queues(&priv->channels);
+ mlx5e_qos_close_all_queues(&priv->channels);
+
+ return err;
+}
+
+static int mlx5e_htb_convert_rate(struct mlx5e_htb *htb, u64 rate,
+ struct mlx5e_qos_node *parent, u32 *bw_share)
+{
+ u64 share = 0;
+
+ while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
+ parent = parent->parent;
+
+ if (parent->max_average_bw)
+ share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
+ parent->max_average_bw);
+ else
+ share = 101;
+
+ *bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
+
+ qos_dbg(htb->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
+ rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
+
+ return 0;
+}
+
+static void mlx5e_htb_convert_ceil(struct mlx5e_htb *htb, u64 ceil, u32 *max_average_bw)
+{
+ /* Hardware treats 0 as "unlimited", set at least 1. */
+ *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
+
+ qos_dbg(htb->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+ ceil, *max_average_bw);
+}
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ int qid;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
+ classid, parent_classid, rate, ceil);
+
+ qid = mlx5e_htb_find_unused_qos_qid(htb);
+ if (qid < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
+ return qid;
+ }
+
+ parent = mlx5e_htb_node_find(htb, parent_classid);
+ if (!parent)
+ return -EINVAL;
+
+ node = mlx5e_htb_node_create_leaf(htb, classid, qid, parent);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &node->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &node->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ mlx5e_htb_node_delete(htb, node);
+ return err;
+ }
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ return mlx5e_qid_from_qos(&priv->channels, node->qid);
+}
+
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *child;
+ struct mlx5e_priv *priv = htb->priv;
+ int err, tmp_err;
+ u32 new_hw_id;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
+ classid, child_classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_inner_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
+ qos_err(htb->mdev, "Failed to create an inner node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ /* Intentionally reuse the qid for the upcoming first child. */
+ child = mlx5e_htb_node_create_leaf(htb, child_classid, node->qid, node);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ goto err_destroy_hw_node;
+ }
+
+ child->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node, &child->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &child->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, new_hw_id, child->bw_share,
+ child->max_average_bw, &child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ goto err_delete_sw_node;
+ }
+
+ /* No fail point. */
+
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, child->qid, child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, child->qid, child->hw_id);
+ }
+ }
+
+ return 0;
+
+err_delete_sw_node:
+ child->qid = MLX5E_QOS_QID_INNER;
+ mlx5e_htb_node_delete(htb, child);
+
+err_destroy_hw_node:
+ tmp_err = mlx5_qos_destroy_node(htb->mdev, new_hw_id);
+ if (tmp_err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
+ new_hw_id, classid, tmp_err);
+ return err;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_by_qid(struct mlx5e_htb *htb, u16 qid)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode)
+ if (node->qid == qid)
+ break;
+
+ return node;
+}
+
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *node;
+ struct netdev_queue *txq;
+ u16 qid, moved_qid;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
+
+ node = mlx5e_htb_node_find(htb, *classid);
+ if (!node)
+ return -ENOENT;
+
+ /* Store qid for reuse. */
+ qid = node->qid;
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, qid));
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, *classid, err);
+
+ mlx5e_htb_node_delete(htb, node);
+
+ moved_qid = mlx5e_htb_cur_leaf_nodes(htb);
+
+ if (moved_qid == 0) {
+ /* The last QoS SQ was just destroyed. */
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+ moved_qid--;
+
+ if (moved_qid < qid) {
+ /* The highest QoS SQ was just destroyed. */
+ WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
+ qid, moved_qid);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+
+ WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
+ qos_dbg(htb->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
+
+ node = mlx5e_htb_node_find_by_qid(htb, moved_qid);
+ WARN(!node, "Could not find a node with qid %u to move to queue %u",
+ moved_qid, qid);
+
+ /* Stop traffic to the old queue. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+ __clear_bit(moved_qid, priv->htb->qos_used_qids);
+
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, moved_qid));
+ mlx5e_deactivate_qos_sq(priv, moved_qid);
+ mlx5e_close_qos_sq(priv, moved_qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, moved_qid);
+
+ __set_bit(qid, htb->qos_used_qids);
+ WRITE_ONCE(node->qid, qid);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
+ node->classid, moved_qid, qid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ mlx5e_update_tx_netdev_queues(priv);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
+
+ *classid = node->classid;
+ return 0;
+}
+
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ u32 old_hw_id, new_hw_id;
+ int err, saved_err = 0;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
+ force ? "_FORCE" : "", classid);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->parent->hw_id,
+ node->parent->bw_share,
+ node->parent->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ if (!force)
+ return err;
+ saved_err = err;
+ }
+
+ /* Store qid for reuse and prevent clearing the bit. */
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, qid);
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ parent = node->parent;
+ mlx5e_htb_node_delete(htb, node);
+
+ node = parent;
+ WRITE_ONCE(node->qid, qid);
+
+ /* Early return on error in force mode. Parent will still be an inner
+ * node to be deleted by a following delete operation.
+ */
+ if (saved_err)
+ return saved_err;
+
+ old_hw_id = node->hw_id;
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, old_hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ return 0;
+}
+
+static int
+mlx5e_htb_update_children(struct mlx5e_htb *htb, struct mlx5e_qos_node *node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *child;
+ int err = 0;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, child, hnode) {
+ u32 old_bw_share = child->bw_share;
+ int err_one;
+
+ if (child->parent != node)
+ continue;
+
+ mlx5e_htb_convert_rate(htb, child->rate, node, &child->bw_share);
+ if (child->bw_share == old_bw_share)
+ continue;
+
+ err_one = mlx5_qos_update_node(htb->mdev, child->bw_share,
+ child->max_average_bw, child->hw_id);
+ if (!err && err_one) {
+ err = err_one;
+
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
+ qos_err(htb->mdev, "Failed to modify a child node (class %04x), err = %d\n",
+ node->classid, err);
+ }
+ }
+
+ return err;
+}
+
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ u32 bw_share, max_average_bw;
+ struct mlx5e_qos_node *node;
+ bool ceil_changed = false;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
+ classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &max_average_bw);
+
+ err = mlx5_qos_update_node(htb->mdev, bw_share,
+ max_average_bw, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
+ qos_err(htb->mdev, "Failed to modify a node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ if (max_average_bw != node->max_average_bw)
+ ceil_changed = true;
+
+ node->bw_share = bw_share;
+ node->max_average_bw = max_average_bw;
+
+ if (ceil_changed)
+ err = mlx5e_htb_update_children(htb, node, extack);
+
+ return err;
+}
+
+struct mlx5e_htb *mlx5e_htb_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_htb), GFP_KERNEL);
+}
+
+void mlx5e_htb_free(struct mlx5e_htb *htb)
+{
+ kvfree(htb);
+}
+
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv)
+{
+ htb->mdev = mdev;
+ htb->netdev = netdev;
+ htb->selq = selq;
+ htb->priv = priv;
+ hash_init(htb->qos_tc2node);
+ return mlx5e_htb_root_add(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->extack);
+}
+
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb)
+{
+ mlx5e_htb_root_del(htb);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
new file mode 100644
index 000000000..8386f1ea4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5E_EN_HTB_H_
+#define __MLX5E_EN_HTB_H_
+
+#include "qos.h"
+
+#define MLX5E_QOS_MAX_LEAF_NODES 256
+
+struct mlx5e_selq;
+struct mlx5e_htb;
+
+typedef int (*mlx5e_fp_htb_enumerate)(void *data, u16 qid, u32 hw_id);
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data);
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb);
+
+/* TX datapath API */
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid);
+
+/* HTB TC handlers */
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+struct mlx5e_htb *mlx5e_htb_alloc(void);
+void mlx5e_htb_free(struct mlx5e_htb *htb);
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv);
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb);
+#endif
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
new file mode 100644
index 000000000..b4f3bd7d3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include "en.h"
+#include "en/hv_vhca_stats.h"
+#include "lib/hv_vhca.h"
+#include "lib/hv.h"
+
+struct mlx5e_hv_vhca_per_ring_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+};
+
+static void
+mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
+ struct mlx5e_hv_vhca_per_ring_stats *data)
+{
+ struct mlx5e_channel_stats *stats;
+ int tc;
+
+ stats = priv->channel_stats[ch];
+ data->rx_packets = stats->rq.packets;
+ data->rx_bytes = stats->rq.bytes;
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++) {
+ data->tx_packets += stats->sq[tc].packets;
+ data->tx_bytes += stats->sq[tc].bytes;
+ }
+}
+
+static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data,
+ int buf_len)
+{
+ int ch, i = 0;
+
+ for (ch = 0; ch < priv->stats_nch; ch++) {
+ void *buf = data + i;
+
+ if (WARN_ON_ONCE(buf +
+ sizeof(struct mlx5e_hv_vhca_per_ring_stats) >
+ data + buf_len))
+ return;
+
+ mlx5e_hv_vhca_fill_ring_stats(priv, ch, buf);
+ i += sizeof(struct mlx5e_hv_vhca_per_ring_stats);
+ }
+}
+
+static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv)
+{
+ return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) *
+ priv->stats_nch);
+}
+
+static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5_hv_vhca_agent *agent;
+ struct delayed_work *dwork;
+ struct mlx5e_priv *priv;
+ int buf_len, rc;
+ void *buf;
+
+ dwork = to_delayed_work(work);
+ sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work);
+ priv = container_of(sagent, struct mlx5e_priv, stats_agent);
+ buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ agent = sagent->agent;
+ buf = sagent->buf;
+
+ memset(buf, 0, buf_len);
+ mlx5e_hv_vhca_fill_stats(priv, buf, buf_len);
+
+ rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len);
+ if (rc) {
+ mlx5_core_err(priv->mdev,
+ "%s: Failed to write stats, err = %d\n",
+ __func__, rc);
+ return;
+ }
+
+ if (sagent->delay)
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+enum {
+ MLX5_HV_VHCA_STATS_VERSION = 1,
+ MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF,
+};
+
+static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_control_block *block)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5e_priv *priv;
+
+ priv = mlx5_hv_vhca_agent_priv(agent);
+ sagent = &priv->stats_agent;
+
+ block->version = MLX5_HV_VHCA_STATS_VERSION;
+ block->rings = priv->stats_nch;
+
+ if (!block->command) {
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+ return;
+ }
+
+ sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 :
+ msecs_to_jiffies(block->command * 100);
+
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
+{
+ struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent);
+
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+}
+
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+ int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ struct mlx5_hv_vhca_agent *agent;
+
+ priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
+ if (!priv->stats_agent.buf)
+ return;
+
+ agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
+ MLX5_HV_VHCA_AGENT_STATS,
+ mlx5e_hv_vhca_stats_control, NULL,
+ mlx5e_hv_vhca_stats_cleanup,
+ priv);
+
+ if (IS_ERR_OR_NULL(agent)) {
+ if (IS_ERR(agent))
+ netdev_warn(priv->netdev,
+ "Failed to create hv vhca stats agent, err = %ld\n",
+ PTR_ERR(agent));
+
+ kvfree(priv->stats_agent.buf);
+ return;
+ }
+
+ priv->stats_agent.agent = agent;
+ INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
+}
+
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+ if (IS_ERR_OR_NULL(priv->stats_agent.agent))
+ return;
+
+ mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
+ kvfree(priv->stats_agent.buf);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
new file mode 100644
index 000000000..29c8c6d32
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_STATS_VHCA_H__
+#define __MLX5_EN_STATS_VHCA_H__
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
+
+#else
+static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {}
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
new file mode 100644
index 000000000..4e72ca807
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <linux/hashtable.h>
+#include <linux/refcount.h>
+
+#include "mapping.h"
+
+#define MAPPING_GRACE_PERIOD 2000
+
+static LIST_HEAD(shared_ctx_list);
+static DEFINE_MUTEX(shared_ctx_lock);
+
+struct mapping_ctx {
+ struct xarray xarray;
+ DECLARE_HASHTABLE(ht, 8);
+ struct mutex lock; /* Guards hashtable and xarray */
+ unsigned long max_id;
+ size_t data_size;
+ bool delayed_removal;
+ struct delayed_work dwork;
+ struct list_head pending_list;
+ spinlock_t pending_list_lock; /* Guards pending list */
+ u64 id;
+ u8 type;
+ struct list_head list;
+ refcount_t refcount;
+};
+
+struct mapping_item {
+ struct rcu_head rcu;
+ struct list_head list;
+ unsigned long timeout;
+ struct hlist_node node;
+ int cnt;
+ u32 id;
+ char data[];
+};
+
+int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id)
+{
+ struct mapping_item *mi;
+ int err = -ENOMEM;
+ u32 hash_key;
+
+ mutex_lock(&ctx->lock);
+
+ hash_key = jhash(data, ctx->data_size, 0);
+ hash_for_each_possible(ctx->ht, mi, node, hash_key) {
+ if (!memcmp(data, mi->data, ctx->data_size))
+ goto attach;
+ }
+
+ mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL);
+ if (!mi)
+ goto err_alloc;
+
+ memcpy(mi->data, data, ctx->data_size);
+ hash_add(ctx->ht, &mi->node, hash_key);
+
+ err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id),
+ GFP_KERNEL);
+ if (err)
+ goto err_assign;
+attach:
+ ++mi->cnt;
+ *id = mi->id;
+
+ mutex_unlock(&ctx->lock);
+
+ return 0;
+
+err_assign:
+ hash_del(&mi->node);
+ kfree(mi);
+err_alloc:
+ mutex_unlock(&ctx->lock);
+
+ return err;
+}
+
+static void mapping_remove_and_free(struct mapping_ctx *ctx,
+ struct mapping_item *mi)
+{
+ xa_erase(&ctx->xarray, mi->id);
+ kfree_rcu(mi, rcu);
+}
+
+static void mapping_free_item(struct mapping_ctx *ctx,
+ struct mapping_item *mi)
+{
+ if (!ctx->delayed_removal) {
+ mapping_remove_and_free(ctx, mi);
+ return;
+ }
+
+ mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD);
+
+ spin_lock(&ctx->pending_list_lock);
+ list_add_tail(&mi->list, &ctx->pending_list);
+ spin_unlock(&ctx->pending_list_lock);
+
+ schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD);
+}
+
+int mapping_remove(struct mapping_ctx *ctx, u32 id)
+{
+ unsigned long index = id;
+ struct mapping_item *mi;
+ int err = -ENOENT;
+
+ mutex_lock(&ctx->lock);
+ mi = xa_load(&ctx->xarray, index);
+ if (!mi)
+ goto out;
+ err = 0;
+
+ if (--mi->cnt > 0)
+ goto out;
+
+ hash_del(&mi->node);
+ mapping_free_item(ctx, mi);
+out:
+ mutex_unlock(&ctx->lock);
+
+ return err;
+}
+
+int mapping_find(struct mapping_ctx *ctx, u32 id, void *data)
+{
+ unsigned long index = id;
+ struct mapping_item *mi;
+ int err = -ENOENT;
+
+ rcu_read_lock();
+ mi = xa_load(&ctx->xarray, index);
+ if (!mi)
+ goto err_find;
+
+ memcpy(data, mi->data, ctx->data_size);
+ err = 0;
+
+err_find:
+ rcu_read_unlock();
+ return err;
+}
+
+static void
+mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list)
+{
+ struct mapping_item *mi;
+
+ list_for_each_entry(mi, list, list)
+ mapping_remove_and_free(ctx, mi);
+}
+
+static void mapping_work_handler(struct work_struct *work)
+{
+ unsigned long min_timeout = 0, now = jiffies;
+ struct mapping_item *mi, *next;
+ LIST_HEAD(pending_items);
+ struct mapping_ctx *ctx;
+
+ ctx = container_of(work, struct mapping_ctx, dwork.work);
+
+ spin_lock(&ctx->pending_list_lock);
+ list_for_each_entry_safe(mi, next, &ctx->pending_list, list) {
+ if (time_after(now, mi->timeout))
+ list_move(&mi->list, &pending_items);
+ else if (!min_timeout ||
+ time_before(mi->timeout, min_timeout))
+ min_timeout = mi->timeout;
+ }
+ spin_unlock(&ctx->pending_list_lock);
+
+ mapping_remove_and_free_list(ctx, &pending_items);
+
+ if (min_timeout)
+ schedule_delayed_work(&ctx->dwork, abs(min_timeout - now));
+}
+
+static void mapping_flush_work(struct mapping_ctx *ctx)
+{
+ if (!ctx->delayed_removal)
+ return;
+
+ cancel_delayed_work_sync(&ctx->dwork);
+ mapping_remove_and_free_list(ctx, &ctx->pending_list);
+}
+
+struct mapping_ctx *
+mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
+{
+ struct mapping_ctx *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ ctx->max_id = max_id ? max_id : UINT_MAX;
+ ctx->data_size = data_size;
+
+ if (delayed_removal) {
+ INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler);
+ INIT_LIST_HEAD(&ctx->pending_list);
+ spin_lock_init(&ctx->pending_list_lock);
+ ctx->delayed_removal = true;
+ }
+
+ mutex_init(&ctx->lock);
+ xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
+
+ refcount_set(&ctx->refcount, 1);
+ INIT_LIST_HEAD(&ctx->list);
+
+ return ctx;
+}
+
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
+{
+ struct mapping_ctx *ctx;
+
+ mutex_lock(&shared_ctx_lock);
+ list_for_each_entry(ctx, &shared_ctx_list, list) {
+ if (ctx->id == id && ctx->type == type) {
+ if (refcount_inc_not_zero(&ctx->refcount))
+ goto unlock;
+ break;
+ }
+ }
+
+ ctx = mapping_create(data_size, max_id, delayed_removal);
+ if (IS_ERR(ctx))
+ goto unlock;
+
+ ctx->id = id;
+ ctx->type = type;
+ list_add(&ctx->list, &shared_ctx_list);
+
+unlock:
+ mutex_unlock(&shared_ctx_lock);
+ return ctx;
+}
+
+void mapping_destroy(struct mapping_ctx *ctx)
+{
+ if (!refcount_dec_and_test(&ctx->refcount))
+ return;
+
+ mutex_lock(&shared_ctx_lock);
+ list_del(&ctx->list);
+ mutex_unlock(&shared_ctx_lock);
+
+ mapping_flush_work(ctx);
+ xa_destroy(&ctx->xarray);
+ mutex_destroy(&ctx->lock);
+
+ kfree(ctx);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
new file mode 100644
index 000000000..4e2119f0f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#ifndef __MLX5_MAPPING_H__
+#define __MLX5_MAPPING_H__
+
+struct mapping_ctx;
+
+int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id);
+int mapping_remove(struct mapping_ctx *ctx, u32 id);
+int mapping_find(struct mapping_ctx *ctx, u32 id, void *data);
+
+/* mapping uses an xarray to map data to ids in add(), and for find().
+ * For locking, it uses a internal xarray spin lock for add()/remove(),
+ * find() uses rcu_read_lock().
+ * Choosing delayed_removal postpones the removal of a previously mapped
+ * id by MAPPING_GRACE_PERIOD milliseconds.
+ * This is to avoid races against hardware, where we mark the packet in
+ * hardware with a previous id, and quick remove() and add() reusing the same
+ * previous id. Then find() will get the new mapping instead of the old
+ * which was used to mark the packet.
+ */
+struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
+ bool delayed_removal);
+void mapping_destroy(struct mapping_ctx *ctx);
+
+/* adds mapping with an id or get an existing mapping with the same id
+ */
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
+
+#endif /* __MLX5_MAPPING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
new file mode 100644
index 000000000..17325c5d6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies
+
+#include <linux/jhash.h>
+#include "mod_hdr.h"
+
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
+
+struct mod_hdr_key {
+ int num_actions;
+ void *actions;
+};
+
+struct mlx5e_mod_hdr_handle {
+ /* a node of a hash table which keeps all the mod_hdr entries */
+ struct hlist_node mod_hdr_hlist;
+
+ struct mod_hdr_key key;
+
+ struct mlx5_modify_hdr *modify_hdr;
+
+ refcount_t refcnt;
+ struct completion res_ready;
+ int compl_result;
+};
+
+static u32 hash_mod_hdr_info(struct mod_hdr_key *key)
+{
+ return jhash(key->actions,
+ key->num_actions * MLX5_MH_ACT_SZ, 0);
+}
+
+static int cmp_mod_hdr_info(struct mod_hdr_key *a, struct mod_hdr_key *b)
+{
+ if (a->num_actions != b->num_actions)
+ return 1;
+
+ return memcmp(a->actions, b->actions,
+ a->num_actions * MLX5_MH_ACT_SZ);
+}
+
+void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl)
+{
+ mutex_init(&tbl->lock);
+ hash_init(tbl->hlist);
+}
+
+void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl)
+{
+ mutex_destroy(&tbl->lock);
+}
+
+static struct mlx5e_mod_hdr_handle *mod_hdr_get(struct mod_hdr_tbl *tbl,
+ struct mod_hdr_key *key,
+ u32 hash_key)
+{
+ struct mlx5e_mod_hdr_handle *mh, *found = NULL;
+
+ hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
+ if (!cmp_mod_hdr_info(&mh->key, key)) {
+ refcount_inc(&mh->refcnt);
+ found = mh;
+ break;
+ }
+ }
+
+ return found;
+}
+
+struct mlx5e_mod_hdr_handle *
+mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ int num_actions, actions_size, err;
+ struct mlx5e_mod_hdr_handle *mh;
+ struct mod_hdr_key key;
+ u32 hash_key;
+
+ num_actions = mod_hdr_acts->num_actions;
+ actions_size = MLX5_MH_ACT_SZ * num_actions;
+
+ key.actions = mod_hdr_acts->actions;
+ key.num_actions = num_actions;
+
+ hash_key = hash_mod_hdr_info(&key);
+
+ mutex_lock(&tbl->lock);
+ mh = mod_hdr_get(tbl, &key, hash_key);
+ if (mh) {
+ mutex_unlock(&tbl->lock);
+ wait_for_completion(&mh->res_ready);
+
+ if (mh->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto attach_header_err;
+ }
+ goto attach_header;
+ }
+
+ mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
+ if (!mh) {
+ mutex_unlock(&tbl->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mh->key.actions = (void *)mh + sizeof(*mh);
+ memcpy(mh->key.actions, key.actions, actions_size);
+ mh->key.num_actions = num_actions;
+ refcount_set(&mh->refcnt, 1);
+ init_completion(&mh->res_ready);
+
+ hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
+ mutex_unlock(&tbl->lock);
+
+ mh->modify_hdr = mlx5_modify_header_alloc(mdev, namespace,
+ mh->key.num_actions,
+ mh->key.actions);
+ if (IS_ERR(mh->modify_hdr)) {
+ err = PTR_ERR(mh->modify_hdr);
+ mh->compl_result = err;
+ goto alloc_header_err;
+ }
+ mh->compl_result = 1;
+ complete_all(&mh->res_ready);
+
+attach_header:
+ return mh;
+
+alloc_header_err:
+ complete_all(&mh->res_ready);
+attach_header_err:
+ mlx5e_mod_hdr_detach(mdev, tbl, mh);
+ return ERR_PTR(err);
+}
+
+void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ struct mlx5e_mod_hdr_handle *mh)
+{
+ if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
+ return;
+ hash_del(&mh->mod_hdr_hlist);
+ mutex_unlock(&tbl->lock);
+
+ if (mh->compl_result > 0)
+ mlx5_modify_header_dealloc(mdev, mh->modify_hdr);
+
+ kfree(mh);
+}
+
+struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh)
+{
+ return mh->modify_hdr;
+}
+
+char *
+mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ int new_num_actions, max_hw_actions;
+ size_t new_sz, old_sz;
+ void *ret;
+
+ if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
+ goto out;
+
+ max_hw_actions = mlx5e_mod_hdr_max_actions(mdev, namespace);
+ new_num_actions = min(max_hw_actions,
+ mod_hdr_acts->actions ?
+ mod_hdr_acts->max_actions * 2 : 1);
+ if (mod_hdr_acts->max_actions == new_num_actions)
+ return ERR_PTR(-ENOSPC);
+
+ new_sz = MLX5_MH_ACT_SZ * new_num_actions;
+ old_sz = mod_hdr_acts->max_actions * MLX5_MH_ACT_SZ;
+
+ if (mod_hdr_acts->is_static) {
+ ret = kzalloc(new_sz, GFP_KERNEL);
+ if (ret) {
+ memcpy(ret, mod_hdr_acts->actions, old_sz);
+ mod_hdr_acts->is_static = false;
+ }
+ } else {
+ ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
+ if (ret)
+ memset(ret + old_sz, 0, new_sz - old_sz);
+ }
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+ mod_hdr_acts->actions = ret;
+ mod_hdr_acts->max_actions = new_num_actions;
+
+out:
+ return mod_hdr_acts->actions + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
+}
+
+void
+mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ if (!mod_hdr_acts->is_static)
+ kfree(mod_hdr_acts->actions);
+
+ mod_hdr_acts->actions = NULL;
+ mod_hdr_acts->num_actions = 0;
+ mod_hdr_acts->max_actions = 0;
+}
+
+char *
+mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos)
+{
+ return mod_hdr_acts->actions + (pos * MLX5_MH_ACT_SZ);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
new file mode 100644
index 000000000..b8dac418d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies */
+
+#ifndef __MLX5E_EN_MOD_HDR_H__
+#define __MLX5E_EN_MOD_HDR_H__
+
+#include <linux/hashtable.h>
+#include <linux/mlx5/fs.h>
+
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
+
+struct mlx5e_mod_hdr_handle;
+
+struct mlx5e_tc_mod_hdr_acts {
+ int num_actions;
+ int max_actions;
+ bool is_static;
+ void *actions;
+};
+
+#define DECLARE_MOD_HDR_ACTS_ACTIONS(name, len) \
+ u8 name[len][MLX5_MH_ACT_SZ] = {}
+
+#define DECLARE_MOD_HDR_ACTS(name, acts_arr) \
+ struct mlx5e_tc_mod_hdr_acts name = { \
+ .max_actions = ARRAY_SIZE(acts_arr), \
+ .is_static = true, \
+ .actions = acts_arr, \
+ }
+
+char *mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+char *mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos);
+
+struct mlx5e_mod_hdr_handle *
+mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ struct mlx5e_mod_hdr_handle *mh);
+struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh);
+
+void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl);
+void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl);
+
+static inline int mlx5e_mod_hdr_max_actions(struct mlx5_core_dev *mdev, int namespace)
+{
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
+ else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+ return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
+}
+
+#endif /* __MLX5E_EN_MOD_HDR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
new file mode 100644
index 000000000..254c84739
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include "en.h"
+#include "monitor_stats.h"
+#include "lib/eq.h"
+
+/* Driver will set the following watch counters list:
+ * Ppcnt.802_3:
+ * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A
+ * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A
+ * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A
+ * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A
+ * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A
+ * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A
+ * Q_Counters:
+ * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix
+ */
+
+#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1
+#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters))
+ return false;
+ if (MLX5_CAP_PCAM_REG(mdev, ppcnt) &&
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) <
+ NUM_REQ_PPCNT_COUNTER_S1)
+ return false;
+ if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) <
+ NUM_REQ_Q_COUNTERS_S1)
+ return false;
+ return true;
+}
+
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
+
+ MLX5_SET(arm_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_ARM_MONITOR_COUNTER);
+ mlx5_cmd_exec_in(priv->mdev, arm_monitor_counter, in);
+}
+
+static void mlx5e_monitor_counters_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ monitor_counters_work);
+
+ mutex_lock(&priv->state_lock);
+ mlx5e_stats_update_ndo_stats(priv);
+ mutex_unlock(&priv->state_lock);
+ mlx5e_monitor_counter_arm(priv);
+}
+
+static int mlx5e_monitor_event_handler(struct notifier_block *nb,
+ unsigned long event, void *eqe)
+{
+ struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv,
+ monitor_counters_nb);
+ queue_work(priv->wq, &priv->monitor_counters_work);
+ return NOTIFY_OK;
+}
+
+static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in)
+{
+ enum mlx5_monitor_counter_ppcnt ppcnt_cnt;
+
+ for (ppcnt_cnt = 0;
+ ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1;
+ ppcnt_cnt++, cnt++) {
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ ppcnt_cnt);
+ }
+ return ppcnt_cnt;
+}
+
+static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in)
+{
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter_group_id,
+ q_counter);
+ return 1;
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters);
+ int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
+ int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+ int q_counter = priv->q_counter;
+ int cnt = 0;
+
+ if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 &&
+ max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt))
+ cnt += fill_monitor_counter_ppcnt_set1(cnt, in);
+
+ if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 &&
+ max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) &&
+ q_counter)
+ cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in);
+
+ MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt);
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec_in(mdev, set_monitor_counter, in);
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
+{
+ INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
+ MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
+ MONITOR_COUNTER);
+ mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
+
+ mlx5e_set_monitor_counter(priv);
+ mlx5e_monitor_counter_arm(priv);
+ queue_work(priv->wq, &priv->update_stats_work);
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec_in(priv->mdev, set_monitor_counter, in);
+ mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+ cancel_work_sync(&priv->monitor_counters_work);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
new file mode 100644
index 000000000..e1ac4b3d2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_MONITOR_H__
+#define __MLX5_MONITOR_H__
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_MONITOR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
new file mode 100644
index 000000000..d3de1b7a8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -0,0 +1,1240 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "en/params.h"
+#include "en/txrx.h"
+#include "en/port.h"
+#include "en_accel/en_accel.h"
+#include "en_accel/ipsec.h"
+#include <net/xdp_sock_drv.h>
+
+static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
+{
+ u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size);
+
+ return min_page_shift ? : 12;
+}
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT;
+ u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev);
+
+ /* Regular RQ uses order-0 pages, the NIC must be able to map them. */
+ if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift))
+ min_page_shift = req_page_shift;
+
+ return max(req_page_shift, min_page_shift);
+}
+
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ /* Different memory management schemes use different mechanisms to map
+ * user-mode memory. The stricter guarantees we have, the faster
+ * mechanisms we use:
+ * 1. MTT - direct mapping in page granularity.
+ * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but
+ * all mappings have the same size.
+ * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and
+ * mappings can have different sizes.
+ */
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ bool oversized = false;
+
+ if (xsk) {
+ oversized = xsk->chunk_size < (1 << page_shift);
+ WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift));
+ }
+
+ /* XSK frame size doesn't match the UMR page size, either because the
+ * frame size is not a power of two, or it's smaller than the minimal
+ * page size supported by the firmware.
+ * It's possible to receive packets bigger than MTU in certain setups.
+ * To avoid writing over the XSK frame boundary, the top region of each
+ * stride is mapped to a garbage page, resulting in two mappings of
+ * different sizes per frame.
+ */
+ if (oversized) {
+ /* An optimization for frame sizes equal to 3 * power_of_two.
+ * 3 KSMs point to the frame, and one KSM points to the garbage
+ * page, which works faster than KLM.
+ */
+ if (xsk->chunk_size % 3 == 0 && is_power_of_2(xsk->chunk_size / 3))
+ return MLX5E_MPWRQ_UMR_MODE_TRIPLE;
+
+ return MLX5E_MPWRQ_UMR_MODE_OVERSIZED;
+ }
+
+ /* XSK frames can start at arbitrary unaligned locations, but they all
+ * have the same size which is a power of two. It allows to optimize to
+ * one KSM per frame.
+ */
+ if (unaligned)
+ return MLX5E_MPWRQ_UMR_MODE_UNALIGNED;
+
+ /* XSK: frames are naturally aligned, MTT can be used.
+ * Non-XSK: Allocations happen in units of CPU pages, therefore, the
+ * mappings are naturally aligned.
+ */
+ return MLX5E_MPWRQ_UMR_MODE_ALIGNED;
+}
+
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode)
+{
+ switch (mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return sizeof(struct mlx5_mtt);
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return sizeof(struct mlx5_ksm);
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ return sizeof(struct mlx5_klm) * 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ return sizeof(struct mlx5_ksm) * 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode);
+ return 0;
+}
+
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u8 max_pages_per_wqe, max_log_mpwqe_size;
+ u16 max_wqe_size;
+
+ /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */
+ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB;
+ max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe),
+ MLX5_UMR_MTT_ALIGNMENT) / umr_entry_size;
+ max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift;
+
+ WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU);
+
+ return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ);
+}
+
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
+ u8 pages_per_wqe;
+
+ pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1;
+
+ /* Two MTTs are needed to form an octword. The number of MTTs is encoded
+ * in octwords in a UMR WQE, so we need at least two to avoid mapping
+ * garbage addresses.
+ */
+ if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ pages_per_wqe = 2;
+
+ /* Sanity check for further calculations to succeed. */
+ BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64);
+ if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE))
+ return MLX5_MPWRQ_MAX_PAGES_PER_WQE;
+
+ return pages_per_wqe;
+}
+
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u16 umr_wqe_sz;
+
+ umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) +
+ ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT);
+
+ WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK);
+
+ return umr_wqe_sz;
+}
+
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, umr_mode),
+ MLX5_SEND_WQE_BB);
+}
+
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+
+ /* Add another page as a buffer between WQEs. This page will absorb
+ * write overflow by the hardware, when receiving packets larger than
+ * MTU. These oversize packets are dropped by the driver at a later
+ * stage.
+ */
+ return ALIGN(pages_per_wqe + 1,
+ MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode));
+}
+
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ /* Same limits apply to KSMs and KLMs. */
+ u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS,
+ 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return MLX5E_MAX_RQ_NUM_MTTS;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return klm_limit;
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ /* Each entry is two KLMs. */
+ return klm_limit / 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ /* Each entry is four KSMs. */
+ return klm_limit / 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
+ return 0;
+}
+
+static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, umr_mode);
+ u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, umr_mode);
+
+ return ilog2(max_entries / mtts_per_wqe);
+}
+
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode) +
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ MLX5E_ORDER2_MAX_PACKET_MTU;
+}
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u16 headroom;
+
+ if (xsk)
+ return xsk->headroom;
+
+ headroom = NET_IP_ALIGN;
+ if (params->xdp_prog)
+ headroom += XDP_PACKET_HEADROOM;
+ else
+ headroom += MLX5_RX_HEADROOM;
+
+ return headroom;
+}
+
+static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+
+ return xsk->headroom + hw_mtu;
+}
+
+static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk)
+{
+ /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */
+ u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL);
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+
+ return MLX5_SKB_FRAG_SZ(headroom + hw_mtu);
+}
+
+static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ bool mpwqe)
+{
+ /* XSK frames are mapped as individual pages, because frames may come in
+ * an arbitrary order from random locations in the UMEM.
+ */
+ if (xsk)
+ return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
+
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (params->xdp_prog)
+ return PAGE_SIZE;
+
+ return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
+}
+
+static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ order_base_2(linear_stride_sz);
+}
+
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE)
+ return false;
+
+ /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
+ * must fit into a CPU page.
+ */
+ if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE)
+ return false;
+
+ /* XSK frames must be big enough to hold the packet data. */
+ if (xsk && mlx5e_rx_get_linear_sz_xsk(params, xsk) > xsk->chunk_size)
+ return false;
+
+ return true;
+}
+
+static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+ u8 log_stride_sz, u8 log_num_strides,
+ u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ if (log_stride_sz + log_num_strides !=
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode))
+ return false;
+
+ if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE ||
+ log_stride_sz > MLX5_MPWQE_LOG_STRIDE_SZ_MAX)
+ return false;
+
+ if (log_num_strides > MLX5_MPWQE_LOG_NUM_STRIDES_MAX)
+ return false;
+
+ if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
+ return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE;
+
+ return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+}
+
+bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 log_num_strides;
+ u8 log_stride_sz;
+ u8 log_wqe_sz;
+
+ if (!mlx5e_rx_is_linear_skb(mdev, params, xsk))
+ return false;
+
+ log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+ log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
+
+ if (log_wqe_sz < log_stride_sz)
+ return false;
+
+ log_num_strides = log_wqe_sz - log_stride_sz;
+
+ return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz,
+ log_num_strides, page_shift,
+ umr_mode);
+}
+
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 log_pkts_per_wqe, page_shift, max_log_rq_size;
+
+ log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk);
+ page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode);
+
+ /* Numbers are unsigned, don't subtract to avoid underflow. */
+ if (params->log_rq_mtu_frames <
+ log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
+ return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+
+ /* Ethtool's rx_max_pending is calculated for regular RQ, that uses
+ * pages of PAGE_SIZE. Max length of an XSK RQ might differ if it uses a
+ * frame size not equal to PAGE_SIZE.
+ * A stricter condition is checked in mlx5e_mpwrq_validate_xsk, WARN on
+ * unexpected failure.
+ */
+ if (WARN_ON_ONCE(params->log_rq_mtu_frames > log_pkts_per_wqe + max_log_rq_size))
+ return max_log_rq_size;
+
+ return params->log_rq_mtu_frames - log_pkts_per_wqe;
+}
+
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE));
+}
+
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE);
+}
+
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) *
+ PAGE_SIZE;
+
+ return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu));
+}
+
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+
+ return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+}
+
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+}
+
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz)
+{
+#define UMR_WQE_BULK (2)
+ return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1);
+}
+
+u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
+ return linear_headroom;
+
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return linear_headroom;
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ return linear_headroom;
+
+ return 0;
+}
+
+u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
+ u16 stop_room;
+
+ stop_room = mlx5e_ktls_get_stop_room(mdev, params);
+ stop_room += mlx5e_stop_room_for_max_wqe(mdev);
+ if (is_mpwqe)
+ /* A MPWQE can take up to the maximum cacheline-aligned WQE +
+ * all the normal stop room can be taken if a new packet breaks
+ * the active MPWQE session and allocates its WQEs right away.
+ */
+ stop_room += mlx5e_stop_room_for_mpwqe(mdev);
+
+ return stop_room;
+}
+
+int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ size_t sq_size = 1 << params->log_sq_size;
+ u16 stop_room;
+
+ stop_room = mlx5e_calc_sq_stop_room(mdev, params);
+ if (stop_room >= sq_size) {
+ mlx5_core_err(mdev, "Stop room %u is bigger than the SQ size %zu\n",
+ stop_room, sq_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+{
+ struct dim_cq_moder moder = {};
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+{
+ struct dim_cq_moder moder = {};
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
+{
+ return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->tx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
+ } else {
+ params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
+ }
+}
+
+void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->rx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
+ } else {
+ params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
+ }
+}
+
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ mlx5e_reset_tx_moderation(params, cq_period_mode);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ params->tx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ mlx5e_reset_rx_moderation(params, cq_period_mode);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ params->rx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
+{
+ u32 link_speed = 0;
+ u32 pci_bw = 0;
+
+ mlx5e_port_max_linkspeed(mdev, &link_speed);
+ pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
+ mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
+ link_speed, pci_bw);
+
+#define MLX5E_SLOW_PCI_RATIO (2)
+
+ return link_speed && pci_bw &&
+ link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
+}
+
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, NULL);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL);
+
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
+
+ if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ u16 max_mtu_pkts;
+
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
+
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return -EINVAL;
+
+ /* Current RQ length is too big for the given frame size, the
+ * needed number of WQEs exceeds the maximum.
+ */
+ max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
+ mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, unaligned));
+ if (params->log_rq_mtu_frames > max_mtu_pkts) {
+ mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n",
+ 1 << params->log_rq_mtu_frames, xsk->chunk_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ params->log_rq_mtu_frames = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+ MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+ mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+ BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) :
+ BIT(params->log_rq_mtu_frames),
+ BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
+ MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+}
+
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ params->rq_wq_type = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
+ MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+ MLX5_WQ_TYPE_CYCLIC;
+}
+
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ /* Prefer Striding RQ, unless any of the following holds:
+ * - Striding RQ configuration is not possible/supported.
+ * - CQE compression is ON, and stride_index mini_cqe layout is not supported.
+ * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+ *
+ * No XSK params: checking the availability of striding RQ in general.
+ */
+ if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ||
+ MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) &&
+ !mlx5e_mpwrq_validate_regular(mdev, params) &&
+ (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
+ !mlx5e_rx_is_linear_skb(mdev, params, NULL)))
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
+ mlx5e_set_rq_type(mdev, params);
+ mlx5e_init_rq_type_params(mdev, params);
+}
+
+/* Build queue parameters */
+
+void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c)
+{
+ *ccp = (struct mlx5e_create_cq_param) {
+ .napi = &c->napi,
+ .ch_stats = c->stats,
+ .node = cpu_to_node(c->cpu),
+ .ix = c->ix,
+ };
+}
+
+static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp)
+{
+ if (xdp)
+ /* XDP requires all fragments to be of the same size. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 1) * frag_size;
+
+ /* Optimization for small packets: the last fragment is bigger than the others. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
+}
+
+#define DEFAULT_FRAG_SIZE (2048)
+
+static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_frags_info *info)
+{
+ u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ int frag_size_max = DEFAULT_FRAG_SIZE;
+ int first_frag_size_max;
+ u32 buf_size = 0;
+ u16 headroom;
+ int max_mtu;
+ int i;
+
+ if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) {
+ int frag_stride;
+
+ frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false);
+
+ info->arr[0].frag_size = byte_count;
+ info->arr[0].frag_stride = frag_stride;
+ info->num_frags = 1;
+
+ /* N WQEs share the same page, N = PAGE_SIZE / frag_stride. The
+ * first WQE in the page is responsible for allocation of this
+ * page, this WQE's index is k*N. If WQEs [k*N+1; k*N+N-1] are
+ * still not completed, the allocation must stop before k*N.
+ */
+ info->wqe_index_mask = (PAGE_SIZE / frag_stride) - 1;
+
+ goto out;
+ }
+
+ headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu || params->xdp_prog) {
+ frag_size_max = PAGE_SIZE;
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu) {
+ mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n",
+ params->sw_mtu, max_mtu);
+ return -EINVAL;
+ }
+ }
+
+ i = 0;
+ while (buf_size < byte_count) {
+ int frag_size = byte_count - buf_size;
+
+ if (i == 0)
+ frag_size = min(frag_size, first_frag_size_max);
+ else if (i < MLX5E_MAX_RX_FRAGS - 1)
+ frag_size = min(frag_size, frag_size_max);
+
+ info->arr[i].frag_size = frag_size;
+ buf_size += frag_size;
+
+ if (params->xdp_prog) {
+ /* XDP multi buffer expects fragments of the same size. */
+ info->arr[i].frag_stride = frag_size_max;
+ } else {
+ if (i == 0) {
+ /* Ensure that headroom and tailroom are included. */
+ frag_size += headroom;
+ frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ }
+ info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
+ }
+
+ i++;
+ }
+ info->num_frags = i;
+
+ /* The last fragment of WQE with index 2*N may share the page with the
+ * first fragment of WQE with index 2*N+1 in certain cases. If WQE 2*N+1
+ * is not completed yet, WQE 2*N must not be allocated, as it's
+ * responsible for allocating a new page.
+ */
+ if (frag_size_max == PAGE_SIZE) {
+ /* No WQE can start in the middle of a page. */
+ info->wqe_index_mask = 0;
+ } else {
+ /* PAGE_SIZEs starting from 8192 don't use 2K-sized fragments,
+ * because there would be more than MLX5E_MAX_RX_FRAGS of them.
+ */
+ WARN_ON(PAGE_SIZE != 2 * DEFAULT_FRAG_SIZE);
+
+ /* Odd number of fragments allows to pack the last fragment of
+ * the previous WQE and the first fragment of the next WQE into
+ * the same page.
+ * As long as DEFAULT_FRAG_SIZE is 2048, and MLX5E_MAX_RX_FRAGS
+ * is 4, the last fragment can be bigger than the rest only if
+ * it's the fourth one, so WQEs consisting of 3 fragments will
+ * always share a page.
+ * When a page is shared, WQE bulk size is 2, otherwise just 1.
+ */
+ info->wqe_index_mask = info->num_frags % 2;
+ }
+
+out:
+ /* Bulking optimization to skip allocation until at least 8 WQEs can be
+ * allocated in a row. At the same time, never start allocation when
+ * the page is still used by older WQEs.
+ */
+ info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
+
+ info->log_num_frags = order_base_2(info->num_frags);
+
+ return 0;
+}
+
+static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
+{
+ int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
+
+ switch (wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ sz += sizeof(struct mlx5e_rx_wqe_ll);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ sz += sizeof(struct mlx5e_rx_wqe_cyc);
+ }
+
+ return order_base_2(sz);
+}
+
+static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
+}
+
+static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+ u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+ int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+ int wqe_size = BIT(log_stride_sz) * num_strides;
+
+ /* +1 is for the case that the pkt_per_rsrv dont consume the reservation
+ * so we get a filler cqe for the rest of the reservation.
+ */
+ return order_base_2((wqe_size / rsrv_size) * wq_size * (pkt_per_rsrv + 1));
+}
+
+static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_cq_param *param)
+{
+ bool hw_stridx = false;
+ void *cqc = param->cqc;
+ u8 log_cq_size;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk);
+ else
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) +
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ log_cq_size = params->log_rq_mtu_frames;
+ }
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
+ MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
+ MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+ }
+
+ mlx5e_build_common_cq_param(mdev, param);
+ param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
+}
+
+static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
+ bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+ MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+
+ return ro && lro_en ?
+ MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
+}
+
+int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ u16 q_counter,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int ndsegs = 1;
+ int err;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: {
+ u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
+ log_wqe_num_of_strides,
+ page_shift, umr_mode)) {
+ mlx5_core_err(mdev,
+ "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n",
+ log_wqe_stride_size, log_wqe_num_of_strides,
+ umr_mode);
+ return -EINVAL;
+ }
+
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
+ MLX5_SET(wq, wq, log_wqe_stride_size,
+ log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
+ MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ MLX5_SET(wq, wq, shampo_enable, true);
+ MLX5_SET(wq, wq, log_reservation_size,
+ mlx5e_shampo_get_log_rsrv_size(mdev, params));
+ MLX5_SET(wq, wq,
+ log_max_num_of_packets_per_reservation,
+ mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ MLX5_SET(wq, wq, log_headers_entry_size,
+ mlx5e_shampo_get_log_hd_entry_size(mdev, params));
+ MLX5_SET(rqc, rqc, reservation_timeout,
+ params->packet_merge.timeout);
+ MLX5_SET(rqc, rqc, shampo_match_criteria_type,
+ params->packet_merge.shampo.match_criteria_type);
+ MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
+ params->packet_merge.shampo.alignment_granularity);
+ }
+ break;
+ }
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
+ err = mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
+ if (err)
+ return err;
+ ndsegs = param->frags_info.num_frags;
+ }
+
+ MLX5_SET(wq, wq, wq_type, params->rq_wq_type);
+ MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params));
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
+ MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(rqc, rqc, counter_set_id, q_counter);
+ MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
+ MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ mlx5e_build_rx_cq_param(mdev, params, xsk, &param->cqp);
+
+ return 0;
+}
+
+void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
+ u16 q_counter,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
+ MLX5_SET(rqc, rqc, counter_set_id, q_counter);
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+}
+
+void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
+
+ mlx5e_build_common_cq_param(mdev, param);
+ param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
+}
+
+void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn);
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+}
+
+void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ bool allow_swp;
+
+ allow_swp = mlx5_geneve_tx_allowed(mdev) ||
+ (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO);
+ mlx5e_build_sq_param_common(mdev, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ MLX5_SET(sqc, sqc, allow_swp, allow_swp);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
+ param->stop_room = mlx5e_calc_sq_stop_room(mdev, params);
+ mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
+}
+
+static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
+
+ mlx5e_build_common_cq_param(mdev, param);
+
+ param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+/* This function calculates the maximum number of headers entries that are needed
+ * per WQE, the formula is based on the size of the reservations and the
+ * restriction we have about max packets for reservation that is equal to max
+ * headers per reservation.
+ */
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+ u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL));
+ int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL);
+ int wqe_size = BIT(log_stride_sz) * num_strides;
+ u32 hd_per_wqe;
+
+ /* Assumption: hd_per_wqe % 8 == 0. */
+ hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv;
+ mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n",
+ __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv);
+ return hd_per_wqe;
+}
+
+/* This function calculates the maximum number of headers entries that are needed
+ * for the WQ, this value is uesed to allocate the header buffer in HW, thus
+ * must be a pow of 2.
+ */
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+ int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ u32 hd_per_wqe, hd_per_wq;
+
+ hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+ hd_per_wq = roundup_pow_of_two(hd_per_wqe * wq_size);
+ return hd_per_wq;
+}
+
+static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest;
+ void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+ int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ u32 wqebbs;
+
+ max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
+ max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+ max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
+ rest = max_hd_per_wqe % max_klm_per_umr;
+ wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe;
+ if (rest)
+ wqebbs += MLX5E_KLM_UMR_WQEBBS(rest);
+ wqebbs *= wq_size;
+ return wqebbs;
+}
+
+static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 umr_wqebbs;
+
+ umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
+
+ return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+}
+
+static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rqp)
+{
+ u32 wqebbs, total_pages, useful_space;
+
+ /* MLX5_WQ_TYPE_CYCLIC */
+ if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+ /* UMR WQEs for the regular RQ. */
+ wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL);
+
+ /* If XDP program is attached, XSK may be turned on at any time without
+ * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of
+ * both regular RQ and XSK RQ.
+ *
+ * XSK uses different values of page_shift, and the total number of UMR
+ * WQEBBs depends on it. This dependency is complex and not monotonic,
+ * especially taking into consideration that some of the parameters come
+ * from capabilities. Hence, we have to try all valid values of XSK
+ * frame size (and page_shift) to find the maximum.
+ */
+ if (params->xdp_prog) {
+ u32 max_xsk_wqebbs = 0;
+ u8 frame_shift;
+
+ for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT;
+ frame_shift <= PAGE_SHIFT; frame_shift++) {
+ /* The headroom doesn't affect the calculation. */
+ struct mlx5e_xsk_param xsk = {
+ .chunk_size = 1 << frame_shift,
+ .unaligned = false,
+ };
+
+ /* XSK aligned mode. */
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a power of two. */
+ xsk.unaligned = true;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is not equal to stride size. */
+ xsk.chunk_size -= 1;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a triple power of two. */
+ xsk.chunk_size = (1 << frame_shift) / 4 * 3;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+ }
+
+ wqebbs += max_xsk_wqebbs;
+ }
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp);
+
+ /* UMR WQEs don't cross the page boundary, they are padded with NOPs.
+ * This padding is always smaller than the max WQE size. That gives us
+ * at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes
+ * per page. The number of pages is estimated as the total size of WQEs
+ * divided by the useful space in page, rounding up. If some WQEs don't
+ * fully fit into the useful space, they can occupy part of the padding,
+ * which proves this estimation to be correct (reserve enough space).
+ */
+ useful_space = PAGE_SIZE - mlx5e_get_max_sq_wqebbs(mdev) + MLX5_SEND_WQE_BB;
+ total_pages = DIV_ROUND_UP(wqebbs * MLX5_SEND_WQE_BB, useful_space);
+ wqebbs = total_pages * (PAGE_SIZE / MLX5_SEND_WQE_BB);
+
+ return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs));
+}
+
+static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
+{
+ if (mlx5e_is_ktls_rx(mdev))
+ return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+
+ return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+}
+
+static void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(mdev, param);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
+ mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
+}
+
+static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(mdev, param);
+ param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */
+ param->is_tls = mlx5e_is_ktls_rx(mdev);
+ if (param->is_tls)
+ param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */
+ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+ mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
+}
+
+void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(mdev, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
+ param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk);
+ mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
+}
+
+int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 q_counter,
+ struct mlx5e_channel_param *cparam)
+{
+ u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
+ int err;
+
+ err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq);
+ if (err)
+ return err;
+
+ icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(mdev, params, &cparam->rq);
+ async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
+
+ mlx5e_build_sq_param(mdev, params, &cparam->txq_sq);
+ mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq);
+ mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq);
+ mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
new file mode 100644
index 000000000..034debd14
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_PARAMS_H__
+#define __MLX5_EN_PARAMS_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param {
+ u16 headroom;
+ u16 chunk_size;
+ bool unaligned;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc[MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+ u16 eq_ix;
+ u8 cq_period_mode;
+};
+
+struct mlx5e_rq_param {
+ struct mlx5e_cq_param cqp;
+ u32 rqc[MLX5_ST_SZ_DW(rqc)];
+ struct mlx5_wq_param wq;
+ struct mlx5e_rq_frags_info frags_info;
+};
+
+struct mlx5e_sq_param {
+ struct mlx5e_cq_param cqp;
+ u32 sqc[MLX5_ST_SZ_DW(sqc)];
+ struct mlx5_wq_param wq;
+ bool is_mpw;
+ bool is_tls;
+ bool is_xdp_mb;
+ u16 stop_room;
+};
+
+struct mlx5e_channel_param {
+ struct mlx5e_rq_param rq;
+ struct mlx5e_sq_param txq_sq;
+ struct mlx5e_sq_param xdp_sq;
+ struct mlx5e_sq_param icosq;
+ struct mlx5e_sq_param async_icosq;
+};
+
+struct mlx5e_create_sq_param {
+ struct mlx5_wq_ctrl *wq_ctrl;
+ u32 cqn;
+ u32 ts_cqe_to_dest_cqn;
+ u32 tisn;
+ u8 tis_lst_sz;
+ u8 min_inline_mode;
+};
+
+/* Striding RQ dynamic parameters */
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode);
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+
+/* Parameter calculations */
+
+void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
+
+bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param);
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param);
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz);
+u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+
+/* Build queue parameters */
+
+void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c);
+int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ u16 q_counter,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
+ u16 q_counter,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_sq_param *param);
+int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 q_counter,
+ struct mlx5e_channel_param *cparam);
+
+u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+
+#endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
new file mode 100644
index 000000000..89510cac4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "port.h"
+
+/* speed in units of 1Mb */
+static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
+ [MLX5E_1000BASE_CX_SGMII] = 1000,
+ [MLX5E_1000BASE_KX] = 1000,
+ [MLX5E_10GBASE_CX4] = 10000,
+ [MLX5E_10GBASE_KX4] = 10000,
+ [MLX5E_10GBASE_KR] = 10000,
+ [MLX5E_20GBASE_KR2] = 20000,
+ [MLX5E_40GBASE_CR4] = 40000,
+ [MLX5E_40GBASE_KR4] = 40000,
+ [MLX5E_56GBASE_R4] = 56000,
+ [MLX5E_10GBASE_CR] = 10000,
+ [MLX5E_10GBASE_SR] = 10000,
+ [MLX5E_10GBASE_ER] = 10000,
+ [MLX5E_40GBASE_SR4] = 40000,
+ [MLX5E_40GBASE_LR4] = 40000,
+ [MLX5E_50GBASE_SR2] = 50000,
+ [MLX5E_100GBASE_CR4] = 100000,
+ [MLX5E_100GBASE_SR4] = 100000,
+ [MLX5E_100GBASE_KR4] = 100000,
+ [MLX5E_100GBASE_LR4] = 100000,
+ [MLX5E_100BASE_TX] = 100,
+ [MLX5E_1000BASE_T] = 1000,
+ [MLX5E_10GBASE_T] = 10000,
+ [MLX5E_25GBASE_CR] = 25000,
+ [MLX5E_25GBASE_KR] = 25000,
+ [MLX5E_25GBASE_SR] = 25000,
+ [MLX5E_50GBASE_CR2] = 50000,
+ [MLX5E_50GBASE_KR2] = 50000,
+};
+
+static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
+ [MLX5E_SGMII_100M] = 100,
+ [MLX5E_1000BASE_X_SGMII] = 1000,
+ [MLX5E_5GBASE_R] = 5000,
+ [MLX5E_10GBASE_XFI_XAUI_1] = 10000,
+ [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000,
+ [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000,
+ [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000,
+ [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000,
+ [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000,
+ [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000,
+ [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000,
+ [MLX5E_400GAUI_8] = 400000,
+ [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000,
+ [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
+ [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
+};
+
+bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev)
+{
+ struct mlx5e_port_eth_proto eproto;
+ int err;
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet))
+ return true;
+
+ err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto);
+ if (err)
+ return false;
+
+ return !!eproto.cap;
+}
+
+static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
+ const u32 **arr, u32 *size,
+ bool force_legacy)
+{
+ bool ext = force_legacy ? false : mlx5e_ptys_ext_supported(mdev);
+
+ *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
+ ARRAY_SIZE(mlx5e_link_speed);
+ *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
+}
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+ struct mlx5e_port_eth_proto *eproto)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ if (!eproto)
+ return -EINVAL;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
+ if (err)
+ return err;
+
+ eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_capability);
+ eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
+ eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
+ return 0;
+}
+
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+ u8 *an_disable_cap, u8 *an_disable_admin)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+
+ *an_status = 0;
+ *an_disable_cap = 0;
+ *an_disable_admin = 0;
+
+ if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1))
+ return;
+
+ *an_status = MLX5_GET(ptys_reg, out, an_status);
+ *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
+ *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+}
+
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+ u32 proto_admin, bool ext)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+ u8 an_disable_admin;
+ u8 an_disable_cap;
+ u8 an_status;
+
+ mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap,
+ &an_disable_admin);
+ if (!an_disable_cap && an_disable)
+ return -EPERM;
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(ptys_reg, in, local_port, 1);
+ MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
+ MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN);
+ if (ext)
+ MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin);
+ else
+ MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PTYS, 0, 1);
+}
+
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+ bool force_legacy)
+{
+ unsigned long temp = eth_proto_oper;
+ const u32 *table;
+ u32 speed = 0;
+ u32 max_size;
+ int i;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+ i = find_first_bit(&temp, max_size);
+ if (i < max_size)
+ speed = table[i];
+ return speed;
+}
+
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ struct mlx5e_port_eth_proto eproto;
+ bool force_legacy = false;
+ bool ext;
+ int err;
+
+ ext = mlx5e_ptys_ext_supported(mdev);
+ err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
+ if (err)
+ goto out;
+ if (ext && !eproto.admin) {
+ force_legacy = true;
+ err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto);
+ if (err)
+ goto out;
+ }
+ *speed = mlx5e_port_ptys2speed(mdev, eproto.oper, force_legacy);
+ if (!(*speed))
+ err = -EINVAL;
+
+out:
+ return err;
+}
+
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ struct mlx5e_port_eth_proto eproto;
+ u32 max_speed = 0;
+ const u32 *table;
+ u32 max_size;
+ bool ext;
+ int err;
+ int i;
+
+ ext = mlx5e_ptys_ext_supported(mdev);
+ err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
+ if (err)
+ return err;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, false);
+ for (i = 0; i < max_size; ++i)
+ if (eproto.cap & MLX5E_PROT_MASK(i))
+ max_speed = max(max_speed, table[i]);
+
+ *speed = max_speed;
+ return 0;
+}
+
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+ bool force_legacy)
+{
+ u32 link_modes = 0;
+ const u32 *table;
+ u32 max_size;
+ int i;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+ for (i = 0; i < max_size; ++i) {
+ if (table[i] == speed)
+ link_modes |= MLX5E_PROT_MASK(i);
+ }
+ return link_modes;
+}
+
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out)
+{
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *in;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(pbmc_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 0);
+
+ kfree(in);
+ return err;
+}
+
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in)
+{
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *out;
+ int err;
+
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(pbmc_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 1);
+
+ kfree(out);
+ return err;
+}
+
+/* buffer[i]: buffer that priority i mapped to */
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+ int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+ u32 prio_x_buff;
+ void *out;
+ void *in;
+ int prio;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(pptb_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+ if (err)
+ goto out;
+
+ prio_x_buff = MLX5_GET(pptb_reg, out, prio_x_buff);
+ for (prio = 0; prio < 8; prio++) {
+ buffer[prio] = (u8)(prio_x_buff >> (4 * prio)) & 0xF;
+ mlx5_core_dbg(mdev, "prio %d, buffer %d\n", prio, buffer[prio]);
+ }
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+ int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+ u32 prio_x_buff;
+ void *out;
+ void *in;
+ int prio;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* First query the pptb register */
+ MLX5_SET(pptb_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+ if (err)
+ goto out;
+
+ memcpy(in, out, sz);
+ MLX5_SET(pptb_reg, in, local_port, 1);
+
+ /* Update the pm and prio_x_buff */
+ MLX5_SET(pptb_reg, in, pm, 0xFF);
+
+ prio_x_buff = 0;
+ for (prio = 0; prio < 8; prio++)
+ prio_x_buff |= (buffer[prio] << (4 * prio));
+ MLX5_SET(pptb_reg, in, prio_x_buff, prio_x_buff);
+
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 1);
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+
+enum mlx5e_fec_supported_link_mode {
+ MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_25G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_50G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_56G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_100G,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X,
+ MLX5E_MAX_FEC_SUPPORTED_LINK_MODE,
+};
+
+#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X
+
+#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \
+ do { \
+ u16 *_policy = &(policy); \
+ u32 *_buf = buf; \
+ \
+ if (write) \
+ MLX5_SET(pplm_reg, _buf, fec_override_admin_##link, *_policy); \
+ else \
+ *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \
+ } while (0)
+
+/* get/set FEC admin field for a given speed */
+static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write,
+ enum mlx5e_fec_supported_link_mode link_mode)
+{
+ switch (link_mode) {
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 10g_40g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_25G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 25g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_50G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_56G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 56g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_100G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#define MLX5E_GET_FEC_OVERRIDE_CAP(buf, link) \
+ MLX5_GET(pplm_reg, buf, fec_override_cap_##link)
+
+/* returns FEC capabilities for a given speed */
+static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap,
+ enum mlx5e_fec_supported_link_mode link_mode)
+{
+ switch (link_mode) {
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 10g_40g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_25G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 25g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_50G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_56G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 56g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_100G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_8x);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy)
+{
+ bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg) || !MLX5_CAP_PCAM_REG(dev, pplm))
+ return false;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return false;
+
+ for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) {
+ u16 fec_caps;
+
+ if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane)
+ break;
+
+ mlx5e_get_fec_cap_field(out, &fec_caps, i);
+ if (fec_caps & fec_policy)
+ return true;
+ }
+ return false;
+}
+
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+ u16 *fec_configured_mode)
+{
+ bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active);
+
+ if (!fec_configured_mode)
+ goto out;
+
+ *fec_configured_mode = 0;
+ for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) {
+ if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane)
+ break;
+
+ mlx5e_fec_admin_field(out, fec_configured_mode, 0, i);
+ if (*fec_configured_mode != 0)
+ goto out;
+ }
+out:
+ return 0;
+}
+
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
+{
+ bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ u16 fec_policy_auto = 0;
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane)
+ return -EOPNOTSUPP;
+
+ if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ MLX5_SET(pplm_reg, out, local_port, 1);
+
+ for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) {
+ u16 conf_fec = fec_policy;
+ u16 fec_caps = 0;
+
+ if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane)
+ break;
+
+ /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514
+ * to link modes up to 25G per lane and to
+ * MLX5E_FEC_RS_544_514 in the new link modes based on
+ * 50 G per lane
+ */
+ if (conf_fec == (1 << MLX5E_FEC_RS_528_514) &&
+ i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE)
+ conf_fec = (1 << MLX5E_FEC_RS_544_514);
+
+ mlx5e_get_fec_cap_field(out, &fec_caps, i);
+
+ /* policy supported for link speed */
+ if (fec_caps & conf_fec)
+ mlx5e_fec_admin_field(out, &conf_fec, 1, i);
+ else
+ /* set FEC to auto*/
+ mlx5e_fec_admin_field(out, &fec_policy_auto, 1, i);
+ }
+
+ return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
new file mode 100644
index 000000000..7a7defe60
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_EN_PORT_H
+#define __MLX5E_EN_PORT_H
+
+#include <linux/mlx5/driver.h>
+#include "en.h"
+
+struct mlx5e_port_eth_proto {
+ u32 cap;
+ u32 admin;
+ u32 oper;
+};
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+ struct mlx5e_port_eth_proto *eproto);
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+ u8 *an_disable_cap, u8 *an_disable_admin);
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+ u32 proto_admin, bool ext);
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+ bool force_legacy);
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+ bool force_legacy);
+bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev);
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+
+bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy);
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+ u16 *fec_configured_mode);
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy);
+
+enum {
+ MLX5E_FEC_NOFEC,
+ MLX5E_FEC_FIRECODE,
+ MLX5E_FEC_RS_528_514,
+ MLX5E_FEC_RS_544_514 = 7,
+ MLX5E_FEC_LLRS_272_257_1 = 9,
+};
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
new file mode 100644
index 000000000..c9d5d8d93
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "port_buffer.h"
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer)
+{
+ u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ u32 total_used = 0;
+ void *buffer;
+ void *out;
+ int err;
+ int i;
+
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5e_port_query_pbmc(mdev, out);
+ if (err)
+ goto out;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
+ port_buffer->buffer[i].lossy =
+ MLX5_GET(bufferx_reg, buffer, lossy);
+ port_buffer->buffer[i].epsb =
+ MLX5_GET(bufferx_reg, buffer, epsb);
+ port_buffer->buffer[i].size =
+ MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz;
+ port_buffer->buffer[i].xon =
+ MLX5_GET(bufferx_reg, buffer, xon_threshold) * port_buff_cell_sz;
+ port_buffer->buffer[i].xoff =
+ MLX5_GET(bufferx_reg, buffer, xoff_threshold) * port_buff_cell_sz;
+ total_used += port_buffer->buffer[i].size;
+
+ mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i,
+ port_buffer->buffer[i].size,
+ port_buffer->buffer[i].xon,
+ port_buffer->buffer[i].xoff,
+ port_buffer->buffer[i].epsb,
+ port_buffer->buffer[i].lossy);
+ }
+
+ port_buffer->port_buffer_size =
+ MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz;
+ port_buffer->spare_buffer_size =
+ port_buffer->port_buffer_size - total_used;
+
+ mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n",
+ port_buffer->port_buffer_size,
+ port_buffer->spare_buffer_size);
+out:
+ kfree(out);
+ return err;
+}
+
+static int port_set_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer)
+{
+ u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *in;
+ int err;
+ int i;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = mlx5e_port_query_pbmc(mdev, in);
+ if (err)
+ goto out;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
+ u64 size = port_buffer->buffer[i].size;
+ u64 xoff = port_buffer->buffer[i].xoff;
+ u64 xon = port_buffer->buffer[i].xon;
+
+ do_div(size, port_buff_cell_sz);
+ do_div(xoff, port_buff_cell_sz);
+ do_div(xon, port_buff_cell_sz);
+ MLX5_SET(bufferx_reg, buffer, size, size);
+ MLX5_SET(bufferx_reg, buffer, lossy, port_buffer->buffer[i].lossy);
+ MLX5_SET(bufferx_reg, buffer, xoff_threshold, xoff);
+ MLX5_SET(bufferx_reg, buffer, xon_threshold, xon);
+ }
+
+ err = mlx5e_port_set_pbmc(mdev, in);
+out:
+ kfree(in);
+ return err;
+}
+
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B])
+ * minimum speed value is 40Gbps
+ */
+static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
+{
+ u32 speed;
+ u32 xoff;
+ int err;
+
+ err = mlx5e_port_linkspeed(priv->mdev, &speed);
+ if (err)
+ speed = SPEED_40000;
+ speed = max_t(u32, speed, SPEED_40000);
+
+ xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
+
+ mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff);
+ return xoff;
+}
+
+static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
+ u32 xoff, unsigned int max_mtu, u16 port_buff_cell_sz)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ if (port_buffer->buffer[i].lossy) {
+ port_buffer->buffer[i].xoff = 0;
+ port_buffer->buffer[i].xon = 0;
+ continue;
+ }
+
+ if (port_buffer->buffer[i].size <
+ (xoff + max_mtu + port_buff_cell_sz)) {
+ pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n",
+ i, port_buffer->buffer[i].size);
+ return -ENOMEM;
+ }
+
+ port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
+ port_buffer->buffer[i].xon =
+ port_buffer->buffer[i].xoff - max_mtu;
+ }
+
+ return 0;
+}
+
+/**
+ * update_buffer_lossy - Update buffer configuration based on pfc
+ * @max_mtu: netdev's max_mtu
+ * @pfc_en: <input> current pfc configuration
+ * @buffer: <input> current prio to buffer mapping
+ * @xoff: <input> xoff value
+ * @port_buff_cell_sz: <input> port buffer cell_size
+ * @port_buffer: <output> port receive buffer configuration
+ * @change: <output>
+ *
+ * Update buffer configuration based on pfc configuration and
+ * priority to buffer mapping.
+ * Buffer's lossy bit is changed to:
+ * lossless if there is at least one PFC enabled priority
+ * mapped to this buffer lossy if all priorities mapped to
+ * this buffer are PFC disabled
+ *
+ * @return: 0 if no error,
+ * sets change to true if buffer configuration was modified.
+ */
+static int update_buffer_lossy(unsigned int max_mtu,
+ u8 pfc_en, u8 *buffer, u32 xoff, u16 port_buff_cell_sz,
+ struct mlx5e_port_buffer *port_buffer,
+ bool *change)
+{
+ bool changed = false;
+ u8 lossy_count;
+ u8 prio_count;
+ u8 lossy;
+ int prio;
+ int err;
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ prio_count = 0;
+ lossy_count = 0;
+
+ for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) {
+ if (buffer[prio] != i)
+ continue;
+
+ prio_count++;
+ lossy_count += !(pfc_en & (1 << prio));
+ }
+
+ if (lossy_count == prio_count)
+ lossy = 1;
+ else /* lossy_count < prio_count */
+ lossy = 0;
+
+ if (lossy != port_buffer->buffer[i].lossy) {
+ port_buffer->buffer[i].lossy = lossy;
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz);
+ if (err)
+ return err;
+
+ *change = true;
+ }
+
+ return 0;
+}
+
+static int fill_pfc_en(struct mlx5_core_dev *mdev, u8 *pfc_en)
+{
+ u32 g_rx_pause, g_tx_pause;
+ int err;
+
+ err = mlx5_query_port_pause(mdev, &g_rx_pause, &g_tx_pause);
+ if (err)
+ return err;
+
+ /* If global pause enabled, set all active buffers to lossless.
+ * Otherwise, check PFC setting.
+ */
+ if (g_rx_pause || g_tx_pause)
+ *pfc_en = 0xff;
+ else
+ err = mlx5_query_port_pfc(mdev, pfc_en, NULL);
+
+ return err;
+}
+
+#define MINIMUM_MAX_MTU 9216
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+ u32 change, unsigned int mtu,
+ struct ieee_pfc *pfc,
+ u32 *buffer_size,
+ u8 *prio2buffer)
+{
+ u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
+ struct mlx5e_port_buffer port_buffer;
+ u32 xoff = calculate_xoff(priv, mtu);
+ bool update_prio2buffer = false;
+ u8 buffer[MLX5E_MAX_PRIORITY];
+ bool update_buffer = false;
+ unsigned int max_mtu;
+ u32 total_used = 0;
+ u8 curr_pfc_en;
+ int err;
+ int i;
+
+ mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+ max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU);
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_PFC) {
+ err = mlx5e_port_query_priority2buffer(priv->mdev, buffer);
+ if (err)
+ return err;
+
+ err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, port_buff_cell_sz,
+ &port_buffer, &update_buffer);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) {
+ update_prio2buffer = true;
+ err = fill_pfc_en(priv->mdev, &curr_pfc_en);
+ if (err)
+ return err;
+
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff,
+ port_buff_cell_sz, &port_buffer, &update_buffer);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_SIZE) {
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]);
+ if (!port_buffer.buffer[i].lossy && !buffer_size[i]) {
+ mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n",
+ __func__, i);
+ return -EINVAL;
+ }
+
+ port_buffer.buffer[i].size = buffer_size[i];
+ total_used += buffer_size[i];
+ }
+
+ mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used);
+
+ if (total_used > port_buffer.port_buffer_size)
+ return -EINVAL;
+
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz);
+ if (err)
+ return err;
+ }
+
+ /* Need to update buffer configuration if xoff value is changed */
+ if (!update_buffer && xoff != priv->dcbx.xoff) {
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz);
+ if (err)
+ return err;
+ }
+ priv->dcbx.xoff = xoff;
+
+ /* Apply the settings */
+ if (update_buffer) {
+ err = port_set_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+ }
+
+ if (update_prio2buffer)
+ err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
new file mode 100644
index 000000000..80af7a5ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_PORT_BUFFER_H__
+#define __MLX5_EN_PORT_BUFFER_H__
+
+#include "en.h"
+#include "port.h"
+
+#define MLX5E_MAX_BUFFER 8
+#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */
+
+#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \
+ MLX5_CAP_PCAM_REG(mdev, pbmc) && \
+ MLX5_CAP_PCAM_REG(mdev, pptb))
+
+enum {
+ MLX5E_PORT_BUFFER_CABLE_LEN = BIT(0),
+ MLX5E_PORT_BUFFER_PFC = BIT(1),
+ MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2),
+ MLX5E_PORT_BUFFER_SIZE = BIT(3),
+};
+
+struct mlx5e_bufferx_reg {
+ u8 lossy;
+ u8 epsb;
+ u32 size;
+ u32 xoff;
+ u32 xon;
+};
+
+struct mlx5e_port_buffer {
+ u32 port_buffer_size;
+ u32 spare_buffer_size;
+ struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER];
+};
+
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+ u32 change, unsigned int mtu,
+ struct ieee_pfc *pfc,
+ u32 *buffer_size,
+ u8 *prio2buffer);
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
new file mode 100644
index 000000000..72b4781f0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -0,0 +1,877 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies
+
+#include "en/ptp.h"
+#include "en/txrx.h"
+#include "en/params.h"
+#include "en/fs_tt_redirect.h"
+
+struct mlx5e_ptp_fs {
+ struct mlx5_flow_handle *l2_rule;
+ struct mlx5_flow_handle *udp_v4_rule;
+ struct mlx5_flow_handle *udp_v6_rule;
+ bool valid;
+};
+
+struct mlx5e_ptp_params {
+ struct mlx5e_params params;
+ struct mlx5e_sq_param txq_sq_param;
+ struct mlx5e_rq_param rq_param;
+};
+
+struct mlx5e_skb_cb_hwtstamp {
+ ktime_t cqe_hwtstamp;
+ ktime_t port_hwtstamp;
+};
+
+void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb)
+{
+ memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
+}
+
+static struct mlx5e_skb_cb_hwtstamp *mlx5e_skb_cb_get_hwts(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct mlx5e_skb_cb_hwtstamp) > sizeof(skb->cb));
+ return (struct mlx5e_skb_cb_hwtstamp *)skb->cb;
+}
+
+static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
+ struct mlx5e_ptp_cq_stats *cq_stats)
+{
+ struct skb_shared_hwtstamps hwts = {};
+ ktime_t diff;
+
+ diff = abs(mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp -
+ mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp);
+
+ /* Maximal allowed diff is 1 / 128 second */
+ if (diff > (NSEC_PER_SEC >> 7)) {
+ cq_stats->abort++;
+ cq_stats->abort_abs_diff_ns += diff;
+ return;
+ }
+
+ hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp;
+ skb_tstamp_tx(skb, &hwts);
+}
+
+void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
+ ktime_t hwtstamp,
+ struct mlx5e_ptp_cq_stats *cq_stats)
+{
+ switch (hwtstamp_type) {
+ case (MLX5E_SKB_CB_CQE_HWTSTAMP):
+ mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp = hwtstamp;
+ break;
+ case (MLX5E_SKB_CB_PORT_HWTSTAMP):
+ mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp = hwtstamp;
+ break;
+ }
+
+ /* If both CQEs arrive, check and report the port tstamp, and clear skb cb as
+ * skb soon to be released.
+ */
+ if (!mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp ||
+ !mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp)
+ return;
+
+ mlx5e_skb_cb_hwtstamp_tx(skb, cq_stats);
+ memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
+}
+
+#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask)
+
+static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+{
+ return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
+}
+
+static bool mlx5e_ptp_ts_cqe_ooo(struct mlx5e_ptpsq *ptpsq, u16 skb_id)
+{
+ u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ u16 skb_pc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc);
+
+ if (PTP_WQE_CTR2IDX(skb_id - skb_cc) >= PTP_WQE_CTR2IDX(skb_pc - skb_cc))
+ return true;
+
+ return false;
+}
+
+static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
+ u16 skb_id, int budget)
+{
+ struct skb_shared_hwtstamps hwts = {};
+ struct sk_buff *skb;
+
+ ptpsq->cq_stats->resync_event++;
+
+ while (skb_cc != skb_id) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
+ skb_tstamp_tx(skb, &hwts);
+ ptpsq->cq_stats->resync_cqe++;
+ napi_consume_skb(skb, budget);
+ skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ }
+}
+
+static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
+ struct mlx5_cqe64 *cqe,
+ int budget)
+{
+ u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
+ u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+ struct sk_buff *skb;
+ ktime_t hwtstamp;
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ ptpsq->cq_stats->err_cqe++;
+ goto out;
+ }
+
+ if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id)) {
+ if (mlx5e_ptp_ts_cqe_ooo(ptpsq, skb_id)) {
+ /* already handled by a previous resync */
+ ptpsq->cq_stats->ooo_cqe_drop++;
+ return;
+ }
+ mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget);
+ }
+
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
+ mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
+ hwtstamp, ptpsq->cq_stats);
+ ptpsq->cq_stats->cqe++;
+
+out:
+ napi_consume_skb(skb, budget);
+}
+
+static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
+{
+ struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq);
+ struct mlx5_cqwq *cqwq = &cq->wq;
+ struct mlx5_cqe64 *cqe;
+ int work_done = 0;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(cqwq);
+ if (!cqe)
+ return false;
+
+ do {
+ mlx5_cqwq_pop(cqwq);
+
+ mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget);
+ } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
+
+ mlx5_cqwq_update_db_record(cqwq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ mlx5e_txqsq_wake(&ptpsq->txqsq);
+
+ return work_done == budget;
+}
+
+static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_ptp *c = container_of(napi, struct mlx5e_ptp, napi);
+ struct mlx5e_ch_stats *ch_stats = c->stats;
+ struct mlx5e_rq *rq = &c->rq;
+ bool busy = false;
+ int work_done = 0;
+ int i;
+
+ rcu_read_lock();
+
+ ch_stats->poll++;
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (i = 0; i < c->num_tc; i++) {
+ busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget);
+ busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget);
+ }
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state) && likely(budget)) {
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+ busy |= work_done == budget;
+ busy |= INDIRECT_CALL_2(rq->post_wqes,
+ mlx5e_post_rx_mpwqes,
+ mlx5e_post_rx_wqes,
+ rq);
+ }
+
+ if (busy) {
+ work_done = budget;
+ goto out;
+ }
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ goto out;
+
+ ch_stats->arm++;
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (i = 0; i < c->num_tc; i++) {
+ mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq);
+ mlx5e_cq_arm(&c->ptpsq[i].ts_cq);
+ }
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ mlx5e_cq_arm(&rq->cq);
+
+out:
+ rcu_read_unlock();
+
+ return work_done;
+}
+
+static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq, int tc,
+ struct mlx5e_ptpsq *ptpsq)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+ int node;
+
+ sq->pdev = c->pdev;
+ sq->clock = &mdev->clock;
+ sq->mkey_be = c->mkey_be;
+ sq->netdev = c->netdev;
+ sq->priv = c->priv;
+ sq->mdev = mdev;
+ sq->ch_ix = MLX5E_PTP_CHANNEL_IX;
+ sq->txq_ix = txq_ix;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ sq->stats = &c->priv->ptp_stats.sq[tc];
+ sq->ptpsq = ptpsq;
+ INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
+ if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+ set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
+ sq->stop_room = param->stop_room;
+ sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
+
+ node = dev_to_node(mlx5_core_dma_dev(mdev));
+
+ param->wq.db_numa_node = node;
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_txqsq_db(sq, node);
+ if (err)
+ goto err_sq_wq_destroy;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_ptp_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
+{
+ mlx5_core_destroy_sq(mdev, sqn);
+}
+
+static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq);
+ struct mlx5_core_dev *mdev = ptpsq->txqsq.mdev;
+
+ ptpsq->skb_fifo.fifo = kvzalloc_node(array_size(wq_sz, sizeof(*ptpsq->skb_fifo.fifo)),
+ GFP_KERNEL, numa);
+ if (!ptpsq->skb_fifo.fifo)
+ return -ENOMEM;
+
+ ptpsq->skb_fifo.pc = &ptpsq->skb_fifo_pc;
+ ptpsq->skb_fifo.cc = &ptpsq->skb_fifo_cc;
+ ptpsq->skb_fifo.mask = wq_sz - 1;
+ if (MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter))
+ ptpsq->ts_cqe_ctr_mask =
+ (1 << MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) - 1;
+ return 0;
+}
+
+static void mlx5e_ptp_drain_skb_fifo(struct mlx5e_skb_fifo *skb_fifo)
+{
+ while (*skb_fifo->pc != *skb_fifo->cc) {
+ struct sk_buff *skb = mlx5e_skb_fifo_pop(skb_fifo);
+
+ dev_kfree_skb_any(skb);
+ }
+}
+
+static void mlx5e_ptp_free_traffic_db(struct mlx5e_skb_fifo *skb_fifo)
+{
+ mlx5e_ptp_drain_skb_fifo(skb_fifo);
+ kvfree(skb_fifo->fifo);
+}
+
+static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn,
+ int txq_ix, struct mlx5e_ptp_params *cparams,
+ int tc, struct mlx5e_ptpsq *ptpsq)
+{
+ struct mlx5e_sq_param *sqp = &cparams->txq_sq_param;
+ struct mlx5e_txqsq *txqsq = &ptpsq->txqsq;
+ struct mlx5e_create_sq_param csp = {};
+ int err;
+
+ err = mlx5e_ptp_alloc_txqsq(c, txq_ix, &cparams->params, sqp,
+ txqsq, tc, ptpsq);
+ if (err)
+ return err;
+
+ csp.tisn = tisn;
+ csp.tis_lst_sz = 1;
+ csp.cqn = txqsq->cq.mcq.cqn;
+ csp.wq_ctrl = &txqsq->wq_ctrl;
+ csp.min_inline_mode = txqsq->min_inline_mode;
+ csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;
+
+ err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
+ if (err)
+ goto err_free_txqsq;
+
+ err = mlx5e_ptp_alloc_traffic_db(ptpsq,
+ dev_to_node(mlx5_core_dma_dev(c->mdev)));
+ if (err)
+ goto err_free_txqsq;
+
+ return 0;
+
+err_free_txqsq:
+ mlx5e_free_txqsq(txqsq);
+
+ return err;
+}
+
+static void mlx5e_ptp_close_txqsq(struct mlx5e_ptpsq *ptpsq)
+{
+ struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+ struct mlx5_core_dev *mdev = sq->mdev;
+
+ mlx5e_ptp_free_traffic_db(&ptpsq->skb_fifo);
+ cancel_work_sync(&sq->recover_work);
+ mlx5e_ptp_destroy_sq(mdev, sq->sqn);
+ mlx5e_free_txqsq_descs(sq);
+ mlx5e_free_txqsq(sq);
+}
+
+static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ struct mlx5e_params *params = &cparams->params;
+ u8 num_tc = mlx5e_get_dcb_num_tc(params);
+ int ix_base;
+ int err;
+ int tc;
+
+ ix_base = num_tc * params->num_channels;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ int txq_ix = ix_base + tc;
+
+ err = mlx5e_ptp_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
+ cparams, tc, &c->ptpsq[tc]);
+ if (err)
+ goto close_txqsq;
+ }
+
+ return 0;
+
+close_txqsq:
+ for (--tc; tc >= 0; tc--)
+ mlx5e_ptp_close_txqsq(&c->ptpsq[tc]);
+
+ return err;
+}
+
+static void mlx5e_ptp_close_txqsqs(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_ptp_close_txqsq(&c->ptpsq[tc]);
+}
+
+static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ struct mlx5e_params *params = &cparams->params;
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder ptp_moder = {};
+ struct mlx5e_cq_param *cq_param;
+ u8 num_tc;
+ int err;
+ int tc;
+
+ num_tc = mlx5e_get_dcb_num_tc(params);
+
+ ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev));
+ ccp.ch_stats = c->stats;
+ ccp.napi = &c->napi;
+ ccp.ix = MLX5E_PTP_CHANNEL_IX;
+
+ cq_param = &cparams->txq_sq_param.cqp;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ struct mlx5e_cq *cq = &c->ptpsq[tc].txqsq.cq;
+
+ err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+ if (err)
+ goto out_err_txqsq_cq;
+ }
+
+ for (tc = 0; tc < num_tc; tc++) {
+ struct mlx5e_cq *cq = &c->ptpsq[tc].ts_cq;
+ struct mlx5e_ptpsq *ptpsq = &c->ptpsq[tc];
+
+ err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+ if (err)
+ goto out_err_ts_cq;
+
+ ptpsq->cq_stats = &c->priv->ptp_stats.cq[tc];
+ }
+
+ return 0;
+
+out_err_ts_cq:
+ for (--tc; tc >= 0; tc--)
+ mlx5e_close_cq(&c->ptpsq[tc].ts_cq);
+ tc = num_tc;
+out_err_txqsq_cq:
+ for (--tc; tc >= 0; tc--)
+ mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq);
+
+ return err;
+}
+
+static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder ptp_moder = {};
+ struct mlx5e_cq_param *cq_param;
+ struct mlx5e_cq *cq = &c->rq.cq;
+
+ ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev));
+ ccp.ch_stats = c->stats;
+ ccp.napi = &c->napi;
+ ccp.ix = MLX5E_PTP_CHANNEL_IX;
+
+ cq_param = &cparams->rq_param.cqp;
+
+ return mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+}
+
+static void mlx5e_ptp_close_tx_cqs(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->ptpsq[tc].ts_cq);
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq);
+}
+
+static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq;
+
+ mlx5e_build_sq_param_common(mdev, param);
+
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ param->stop_room = mlx5e_stop_room_for_max_wqe(mdev);
+ mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
+}
+
+static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ u16 q_counter,
+ struct mlx5e_ptp_params *ptp_params)
+{
+ struct mlx5e_rq_param *rq_params = &ptp_params->rq_param;
+ struct mlx5e_params *params = &ptp_params->params;
+
+ params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+ mlx5e_init_rq_type_params(mdev, params);
+ params->sw_mtu = netdev->max_mtu;
+ mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params);
+}
+
+static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams,
+ struct mlx5e_params *orig)
+{
+ struct mlx5e_params *params = &cparams->params;
+
+ params->tx_min_inline_mode = orig->tx_min_inline_mode;
+ params->num_channels = orig->num_channels;
+ params->hard_mtu = orig->hard_mtu;
+ params->sw_mtu = orig->sw_mtu;
+ params->mqprio = orig->mqprio;
+
+ /* SQ */
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ params->log_sq_size = orig->log_sq_size;
+ mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
+ }
+ /* RQ */
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ params->vlan_strip_disable = orig->vlan_strip_disable;
+ mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams);
+ }
+}
+
+static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5e_priv *priv = c->priv;
+ int err;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = priv->netdev;
+ rq->priv = priv;
+ rq->clock = &mdev->clock;
+ rq->tstamp = &priv->tstamp;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->stats = &c->priv->ptp_stats.rq;
+ rq->ix = MLX5E_PTP_CHANNEL_IX;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ err = mlx5e_rq_set_handlers(rq, params, false);
+ if (err)
+ return err;
+
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
+}
+
+static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int node = dev_to_node(c->mdev->device);
+ int err;
+
+ err = mlx5e_init_ptp_rq(c, params, &c->rq);
+ if (err)
+ return err;
+
+ return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq);
+}
+
+static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ int err;
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ err = mlx5e_ptp_open_tx_cqs(c, cparams);
+ if (err)
+ return err;
+
+ err = mlx5e_ptp_open_txqsqs(c, cparams);
+ if (err)
+ goto close_tx_cqs;
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ err = mlx5e_ptp_open_rx_cq(c, cparams);
+ if (err)
+ goto close_txqsq;
+
+ err = mlx5e_ptp_open_rq(c, &cparams->params, &cparams->rq_param);
+ if (err)
+ goto close_rx_cq;
+ }
+ return 0;
+
+close_rx_cq:
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ mlx5e_close_cq(&c->rq.cq);
+close_txqsq:
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state))
+ mlx5e_ptp_close_txqsqs(c);
+close_tx_cqs:
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state))
+ mlx5e_ptp_close_tx_cqs(c);
+
+ return err;
+}
+
+static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c)
+{
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ mlx5e_close_rq(&c->rq);
+ mlx5e_close_cq(&c->rq.cq);
+ }
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ mlx5e_ptp_close_txqsqs(c);
+ mlx5e_ptp_close_tx_cqs(c);
+ }
+}
+
+static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params)
+{
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS))
+ __set_bit(MLX5E_PTP_STATE_TX, c->state);
+
+ if (params->ptp_rx)
+ __set_bit(MLX5E_PTP_STATE_RX, c->state);
+
+ return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0;
+}
+
+static void mlx5e_ptp_rx_unset_fs(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
+
+ if (!ptp_fs->valid)
+ return;
+
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule);
+ mlx5e_fs_tt_redirect_any_destroy(fs);
+
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
+ ptp_fs->valid = false;
+}
+
+static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
+{
+ u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
+ struct mlx5e_flow_steering *fs = priv->fs;
+ struct mlx5_flow_handle *rule;
+ struct mlx5e_ptp_fs *ptp_fs;
+ int err;
+
+ ptp_fs = mlx5e_fs_get_ptp(fs);
+ if (ptp_fs->valid)
+ return 0;
+
+ err = mlx5e_fs_tt_redirect_udp_create(fs);
+ if (err)
+ goto out_free;
+
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV4_UDP,
+ tirn, PTP_EV_PORT);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto out_destroy_fs_udp;
+ }
+ ptp_fs->udp_v4_rule = rule;
+
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV6_UDP,
+ tirn, PTP_EV_PORT);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto out_destroy_udp_v4_rule;
+ }
+ ptp_fs->udp_v6_rule = rule;
+
+ err = mlx5e_fs_tt_redirect_any_create(fs);
+ if (err)
+ goto out_destroy_udp_v6_rule;
+
+ rule = mlx5e_fs_tt_redirect_any_add_rule(fs, tirn, ETH_P_1588);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto out_destroy_fs_any;
+ }
+ ptp_fs->l2_rule = rule;
+ ptp_fs->valid = true;
+
+ return 0;
+
+out_destroy_fs_any:
+ mlx5e_fs_tt_redirect_any_destroy(fs);
+out_destroy_udp_v6_rule:
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
+out_destroy_udp_v4_rule:
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
+out_destroy_fs_udp:
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
+out_free:
+ return err;
+}
+
+int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ u8 lag_port, struct mlx5e_ptp **cp)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_ptp_params *cparams;
+ struct mlx5e_ptp *c;
+ int err;
+
+
+ c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev)));
+ cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL);
+ if (!c || !cparams) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ c->priv = priv;
+ c->mdev = priv->mdev;
+ c->tstamp = &priv->tstamp;
+ c->pdev = mlx5_core_dma_dev(priv->mdev);
+ c->netdev = priv->netdev;
+ c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
+ c->num_tc = mlx5e_get_dcb_num_tc(params);
+ c->stats = &priv->ptp_stats.ch;
+ c->lag_port = lag_port;
+
+ err = mlx5e_ptp_set_state(c, params);
+ if (err)
+ goto err_free;
+
+ netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll);
+
+ mlx5e_ptp_build_params(c, cparams, params);
+
+ err = mlx5e_ptp_open_queues(c, cparams);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ priv->rx_ptp_opened = true;
+
+ *cp = c;
+
+ kvfree(cparams);
+
+ return 0;
+
+err_napi_del:
+ netif_napi_del(&c->napi);
+err_free:
+ kvfree(cparams);
+ kvfree(c);
+ return err;
+}
+
+void mlx5e_ptp_close(struct mlx5e_ptp *c)
+{
+ mlx5e_ptp_close_queues(c);
+ netif_napi_del(&c->napi);
+
+ kvfree(c);
+}
+
+void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ napi_enable(&c->napi);
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq);
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ mlx5e_ptp_rx_set_fs(c->priv);
+ mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_sched(&c->napi);
+ }
+}
+
+void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ mlx5e_deactivate_rq(&c->rq);
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq);
+ }
+
+ napi_disable(&c->napi);
+}
+
+int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn)
+{
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+ return -EINVAL;
+
+ *rqn = c->rq.rqn;
+ return 0;
+}
+
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
+{
+ struct mlx5e_ptp_fs *ptp_fs;
+
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
+ return 0;
+
+ ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL);
+ if (!ptp_fs)
+ return -ENOMEM;
+ mlx5e_fs_set_ptp(fs, ptp_fs);
+
+ return 0;
+}
+
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
+{
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
+
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
+ return;
+
+ mlx5e_ptp_rx_unset_fs(fs);
+ kfree(ptp_fs);
+}
+
+int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
+{
+ struct mlx5e_ptp *c = priv->channels.ptp;
+
+ if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
+ return 0;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ if (set) {
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules");
+ return -EINVAL;
+ }
+ return mlx5e_ptp_rx_set_fs(priv);
+ }
+ /* set == false */
+ if (c && test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules");
+ return -EINVAL;
+ }
+ mlx5e_ptp_rx_unset_fs(priv->fs);
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
new file mode 100644
index 000000000..cc7efde88
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_PTP_H__
+#define __MLX5_EN_PTP_H__
+
+#include "en.h"
+#include "en_stats.h"
+#include "en/txrx.h"
+#include <linux/ptp_classify.h>
+
+#define MLX5E_PTP_CHANNEL_IX 0
+
+struct mlx5e_ptpsq {
+ struct mlx5e_txqsq txqsq;
+ struct mlx5e_cq ts_cq;
+ u16 skb_fifo_cc;
+ u16 skb_fifo_pc;
+ struct mlx5e_skb_fifo skb_fifo;
+ struct mlx5e_ptp_cq_stats *cq_stats;
+ u16 ts_cqe_ctr_mask;
+};
+
+enum {
+ MLX5E_PTP_STATE_TX,
+ MLX5E_PTP_STATE_RX,
+ MLX5E_PTP_STATE_NUM_STATES,
+};
+
+struct mlx5e_ptp {
+ /* data path */
+ struct mlx5e_ptpsq ptpsq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rq rq;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+ u8 num_tc;
+ u8 lag_port;
+
+ /* data path - accessed per napi poll */
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES);
+};
+
+static inline bool mlx5e_use_ptpsq(struct sk_buff *skb)
+{
+ struct flow_keys fk;
+
+ if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ return false;
+
+ if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+ return false;
+
+ if (fk.basic.n_proto == htons(ETH_P_1588))
+ return true;
+
+ if (fk.basic.n_proto != htons(ETH_P_IP) &&
+ fk.basic.n_proto != htons(ETH_P_IPV6))
+ return false;
+
+ return (fk.basic.ip_proto == IPPROTO_UDP &&
+ fk.ports.dst == htons(PTP_EV_PORT));
+}
+
+static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq)
+{
+ if (!sq->ptpsq)
+ return true;
+
+ return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo);
+}
+
+int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ u8 lag_port, struct mlx5e_ptp **cp);
+void mlx5e_ptp_close(struct mlx5e_ptp *c);
+void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c);
+void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c);
+int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn);
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
+int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set);
+
+enum {
+ MLX5E_SKB_CB_CQE_HWTSTAMP = BIT(0),
+ MLX5E_SKB_CB_PORT_HWTSTAMP = BIT(1),
+};
+
+void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
+ ktime_t hwtstamp,
+ struct mlx5e_ptp_cq_stats *cq_stats);
+
+void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb);
+#endif /* __MLX5_EN_PTP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
new file mode 100644
index 000000000..2842195ee
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+#include <net/sch_generic.h>
+
+#include <net/pkt_cls.h>
+#include "en.h"
+#include "params.h"
+#include "../qos.h"
+#include "en/htb.h"
+
+struct qos_sq_callback_params {
+ struct mlx5e_priv *priv;
+ struct mlx5e_channels *chs;
+};
+
+int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes)
+{
+ if (nbytes < BYTES_IN_MBIT) {
+ qos_warn(mdev, "Input rate (%llu Bytes/sec) below minimum supported (%u Bytes/sec)\n",
+ nbytes, BYTES_IN_MBIT);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static u32 mlx5e_qos_bytes2mbits(struct mlx5_core_dev *mdev, u64 nbytes)
+{
+ return div_u64(nbytes, BYTES_IN_MBIT);
+}
+
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+ return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev));
+}
+
+/* TX datapath API */
+
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
+{
+ /* These channel params are safe to access from the datapath, because:
+ * 1. This function is called only after checking selq->htb_maj_id != 0,
+ * and the number of queues can't change while HTB offload is active.
+ * 2. When selq->htb_maj_id becomes 0, synchronize_rcu waits for
+ * mlx5e_select_queue to finish while holding priv->state_lock,
+ * preventing other code from changing the number of queues.
+ */
+ bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS);
+
+ return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid;
+}
+
+/* SQ lifecycle */
+
+static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_channel *c;
+ int ix;
+
+ ix = qid % params->num_channels;
+ qid /= params->num_channels;
+ c = priv->channels.c[ix];
+
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ return mlx5e_state_dereference(priv, qos_sqs[qid]);
+}
+
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id)
+{
+ struct mlx5e_create_cq_param ccp = {};
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_sq_param param_sq;
+ struct mlx5e_cq_param param_cq;
+ int txq_ix, ix, qid, err = 0;
+ struct mlx5e_params *params;
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+
+ params = &chs->params;
+
+ txq_ix = mlx5e_qid_from_qos(chs, node_qid);
+
+ WARN_ON(node_qid > priv->htb_max_qos_sqs);
+ if (node_qid == priv->htb_max_qos_sqs) {
+ struct mlx5e_sq_stats *stats, **stats_list = NULL;
+
+ if (priv->htb_max_qos_sqs == 0) {
+ stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+ sizeof(*stats_list),
+ GFP_KERNEL);
+ if (!stats_list)
+ return -ENOMEM;
+ }
+ stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+ if (!stats) {
+ kvfree(stats_list);
+ return -ENOMEM;
+ }
+ if (stats_list)
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
+ /* Order htb_max_qos_sqs increment after writing the array pointer.
+ * Pairs with smp_load_acquire in en_stats.c.
+ */
+ smp_store_release(&priv->htb_max_qos_sqs, priv->htb_max_qos_sqs + 1);
+ }
+
+ ix = node_qid % params->num_channels;
+ qid = node_qid / params->num_channels;
+ c = chs->c[ix];
+
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ sq = kzalloc(sizeof(*sq), GFP_KERNEL);
+
+ if (!sq)
+ return -ENOMEM;
+
+ mlx5e_build_create_cq_param(&ccp, c);
+
+ memset(&param_sq, 0, sizeof(param_sq));
+ memset(&param_cq, 0, sizeof(param_cq));
+ mlx5e_build_sq_param(priv->mdev, params, &param_sq);
+ mlx5e_build_tx_cq_param(priv->mdev, params, &param_cq);
+ err = mlx5e_open_cq(priv, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq);
+ if (err)
+ goto err_free_sq;
+ err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params,
+ &param_sq, sq, 0, hw_id,
+ priv->htb_qos_sq_stats[node_qid]);
+ if (err)
+ goto err_close_cq;
+
+ rcu_assign_pointer(qos_sqs[qid], sq);
+
+ return 0;
+
+err_close_cq:
+ mlx5e_close_cq(&sq->cq);
+err_free_sq:
+ kfree(sq);
+ return err;
+}
+
+static int mlx5e_open_qos_sq_cb_wrapper(void *data, u16 node_qid, u32 hw_id)
+{
+ struct qos_sq_callback_params *cb_params = data;
+
+ return mlx5e_open_qos_sq(cb_params->priv, cb_params->chs, node_qid, hw_id);
+}
+
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
+{
+ struct mlx5e_priv *priv = data;
+ struct mlx5e_txqsq *sq;
+ u16 qid;
+
+ sq = mlx5e_get_qos_sq(priv, node_qid);
+
+ qid = mlx5e_qid_from_qos(&priv->channels, node_qid);
+
+ /* If it's a new queue, it will be marked as started at this point.
+ * Stop it before updating txq2sq.
+ */
+ mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid));
+
+ priv->txq2sq[qid] = sq;
+
+ /* Make the change to txq2sq visible before the queue is started.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
+
+ qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid);
+ mlx5e_activate_txqsq(sq);
+
+ return 0;
+}
+
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_get_qos_sq(priv, qid);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ return;
+
+ qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
+ mlx5e_deactivate_txqsq(sq);
+
+ priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
+
+ /* Make the change to txq2sq visible before the queue is started again.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
+}
+
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_params *params;
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+ int ix;
+
+ params = &priv->channels.params;
+
+ ix = qid % params->num_channels;
+ qid /= params->num_channels;
+ c = priv->channels.c[ix];
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock));
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ return;
+
+ synchronize_rcu(); /* Sync with NAPI. */
+
+ mlx5e_close_txqsq(sq);
+ mlx5e_close_cq(&sq->cq);
+ kfree(sq);
+}
+
+void mlx5e_qos_close_queues(struct mlx5e_channel *c)
+{
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ int i;
+
+ qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock));
+ if (!qos_sqs)
+ return;
+ synchronize_rcu(); /* Sync with NAPI. */
+
+ for (i = 0; i < c->qos_sqs_size; i++) {
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ continue;
+
+ mlx5e_close_txqsq(sq);
+ mlx5e_close_cq(&sq->cq);
+ kfree(sq);
+ }
+
+ kvfree(qos_sqs);
+}
+
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_qos_close_queues(chs->c[i]);
+}
+
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ u16 qos_sqs_size;
+ int i;
+
+ qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num);
+
+ for (i = 0; i < chs->num; i++) {
+ struct mlx5e_txqsq **sqs;
+
+ sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL);
+ if (!sqs)
+ goto err_free;
+
+ WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size);
+ smp_wmb(); /* Pairs with mlx5e_napi_poll. */
+ rcu_assign_pointer(chs->c[i]->qos_sqs, sqs);
+ }
+
+ return 0;
+
+err_free:
+ while (--i >= 0) {
+ struct mlx5e_txqsq **sqs;
+
+ sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL,
+ lockdep_is_held(&priv->state_lock));
+
+ synchronize_rcu(); /* Sync with NAPI. */
+ kvfree(sqs);
+ }
+ return -ENOMEM;
+}
+
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ struct qos_sq_callback_params callback_params;
+ int err;
+
+ err = mlx5e_qos_alloc_queues(priv, chs);
+ if (err)
+ return err;
+
+ callback_params.priv = priv;
+ callback_params.chs = chs;
+
+ err = mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_open_qos_sq_cb_wrapper, &callback_params);
+ if (err) {
+ mlx5e_qos_close_all_queues(chs);
+ return err;
+ }
+
+ return 0;
+}
+
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv)
+{
+ mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_activate_qos_sq, priv);
+}
+
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
+{
+ struct mlx5e_params *params = &c->priv->channels.params;
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ int i;
+
+ qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs);
+ if (!qos_sqs)
+ return;
+
+ for (i = 0; i < c->qos_sqs_size; i++) {
+ u16 qid = params->num_channels * i + c->ix;
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ continue;
+
+ qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid);
+ mlx5e_deactivate_txqsq(sq);
+
+ /* The queue is disabled, no synchronization with datapath is needed. */
+ c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL;
+ }
+}
+
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_qos_deactivate_queues(chs->c[i]);
+}
+
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
+{
+ qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid);
+ netdev_tx_reset_queue(txq);
+ netif_tx_start_queue(txq);
+}
+
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
+{
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid);
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+ if (!qdisc)
+ return;
+
+ spin_lock_bh(qdisc_lock(qdisc));
+ qdisc_reset(qdisc);
+ spin_unlock_bh(qdisc_lock(qdisc));
+}
+
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_qopt)
+{
+ struct mlx5e_htb *htb = priv->htb;
+ int res;
+
+ if (!htb && htb_qopt->command != TC_HTB_CREATE)
+ return -EINVAL;
+
+ switch (htb_qopt->command) {
+ case TC_HTB_CREATE:
+ if (!mlx5_qos_is_supported(priv->mdev)) {
+ NL_SET_ERR_MSG_MOD(htb_qopt->extack,
+ "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+ return -EOPNOTSUPP;
+ }
+ priv->htb = mlx5e_htb_alloc();
+ htb = priv->htb;
+ if (!htb)
+ return -ENOMEM;
+ res = mlx5e_htb_init(htb, htb_qopt, priv->netdev, priv->mdev, &priv->selq, priv);
+ if (res) {
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
+ }
+ return res;
+ case TC_HTB_DESTROY:
+ mlx5e_htb_cleanup(htb);
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
+ return 0;
+ case TC_HTB_LEAF_ALLOC_QUEUE:
+ res = mlx5e_htb_leaf_alloc_queue(htb, htb_qopt->classid, htb_qopt->parent_classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ case TC_HTB_LEAF_TO_INNER:
+ return mlx5e_htb_leaf_to_inner(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL:
+ return mlx5e_htb_leaf_del(htb, &htb_qopt->classid, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL_LAST:
+ case TC_HTB_LEAF_DEL_LAST_FORCE:
+ return mlx5e_htb_leaf_del_last(htb, htb_qopt->classid,
+ htb_qopt->command == TC_HTB_LEAF_DEL_LAST_FORCE,
+ htb_qopt->extack);
+ case TC_HTB_NODE_MODIFY:
+ return mlx5e_htb_node_modify(htb, htb_qopt->classid, htb_qopt->rate, htb_qopt->ceil,
+ htb_qopt->extack);
+ case TC_HTB_LEAF_QUERY_QUEUE:
+ res = mlx5e_htb_get_txq_by_classid(htb, htb_qopt->classid);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+struct mlx5e_mqprio_rl {
+ struct mlx5_core_dev *mdev;
+ u32 root_id;
+ u32 *leaves_id;
+ u8 num_tc;
+};
+
+struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_mqprio_rl), GFP_KERNEL);
+}
+
+void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl)
+{
+ kvfree(rl);
+}
+
+int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc,
+ u64 max_rate[])
+{
+ int err;
+ int tc;
+
+ if (!mlx5_qos_is_supported(mdev)) {
+ qos_warn(mdev, "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+ return -EOPNOTSUPP;
+ }
+ if (num_tc > mlx5e_qos_max_leaf_nodes(mdev))
+ return -EINVAL;
+
+ rl->mdev = mdev;
+ rl->num_tc = num_tc;
+ rl->leaves_id = kvcalloc(num_tc, sizeof(*rl->leaves_id), GFP_KERNEL);
+ if (!rl->leaves_id)
+ return -ENOMEM;
+
+ err = mlx5_qos_create_root_node(mdev, &rl->root_id);
+ if (err)
+ goto err_free_leaves;
+
+ qos_dbg(mdev, "Root created, id %#x\n", rl->root_id);
+
+ for (tc = 0; tc < num_tc; tc++) {
+ u32 max_average_bw;
+
+ max_average_bw = mlx5e_qos_bytes2mbits(mdev, max_rate[tc]);
+ err = mlx5_qos_create_leaf_node(mdev, rl->root_id, 0, max_average_bw,
+ &rl->leaves_id[tc]);
+ if (err)
+ goto err_destroy_leaves;
+
+ qos_dbg(mdev, "Leaf[%d] created, id %#x, max average bw %u Mbits/sec\n",
+ tc, rl->leaves_id[tc], max_average_bw);
+ }
+ return 0;
+
+err_destroy_leaves:
+ while (--tc >= 0)
+ mlx5_qos_destroy_node(mdev, rl->leaves_id[tc]);
+ mlx5_qos_destroy_node(mdev, rl->root_id);
+err_free_leaves:
+ kvfree(rl->leaves_id);
+ return err;
+}
+
+void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl)
+{
+ int tc;
+
+ for (tc = 0; tc < rl->num_tc; tc++)
+ mlx5_qos_destroy_node(rl->mdev, rl->leaves_id[tc]);
+ mlx5_qos_destroy_node(rl->mdev, rl->root_id);
+ kvfree(rl->leaves_id);
+}
+
+int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id)
+{
+ if (tc >= rl->num_tc)
+ return -EINVAL;
+
+ *hw_id = rl->leaves_id[tc];
+ return 0;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
new file mode 100644
index 000000000..4947afa23
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_EN_QOS_H
+#define __MLX5E_EN_QOS_H
+
+#include <linux/mlx5/driver.h>
+
+#define BYTES_IN_MBIT 125000
+
+struct mlx5e_priv;
+struct mlx5e_htb;
+struct mlx5e_channels;
+struct mlx5e_channel;
+struct tc_htb_qopt_offload;
+
+int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes);
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+
+/* SQ lifecycle */
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id);
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id);
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq);
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid);
+
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs);
+void mlx5e_qos_close_queues(struct mlx5e_channel *c);
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs);
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+/* TX datapath API */
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid);
+
+/* HTB API */
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb);
+
+/* MQPRIO TX rate limit */
+struct mlx5e_mqprio_rl;
+struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void);
+void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl);
+int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc,
+ u64 max_rate[]);
+void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl);
+int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
new file mode 100644
index 000000000..b6f5c1bcd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <net/lag.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "en_rep.h"
+
+struct mlx5e_rep_bond {
+ struct notifier_block nb;
+ struct netdev_net_notifier nn;
+ struct list_head metadata_list;
+};
+
+struct mlx5e_rep_bond_slave_entry {
+ struct list_head list;
+ struct net_device *netdev;
+};
+
+struct mlx5e_rep_bond_metadata {
+ struct list_head list; /* link to global list of rep_bond_metadata */
+ struct mlx5_eswitch *esw;
+ /* private of uplink holding rep bond metadata list */
+ struct net_device *lag_dev;
+ u32 metadata_reg_c_0;
+
+ struct list_head slaves_list; /* slaves list */
+ int slaves;
+};
+
+static struct mlx5e_rep_bond_metadata *
+mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_metadata *found = NULL;
+ struct mlx5e_rep_bond_metadata *cur;
+
+ list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
+ if (cur->lag_dev == lag_dev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static struct mlx5e_rep_bond_slave_entry *
+mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
+ const struct net_device *netdev)
+{
+ struct mlx5e_rep_bond_slave_entry *found = NULL;
+ struct mlx5e_rep_bond_slave_entry *cur;
+
+ list_for_each_entry(cur, &mdata->slaves_list, list) {
+ if (cur->netdev == netdev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
+{
+ netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ list_del(&mdata->list);
+ mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
+ WARN_ON(!list_empty(&mdata->slaves_list));
+ kfree(mdata);
+}
+
+/* This must be called under rtnl_lock */
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+ struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+ int err;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata) {
+ /* First netdev becomes slave, no metadata presents the lag_dev. Create one */
+ mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
+ if (!mdata)
+ return -ENOMEM;
+
+ mdata->lag_dev = lag_dev;
+ mdata->esw = esw;
+ INIT_LIST_HEAD(&mdata->slaves_list);
+ mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
+ if (!mdata->metadata_reg_c_0) {
+ kfree(mdata);
+ return -ENOSPC;
+ }
+ list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
+
+ netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ }
+
+ s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
+ if (!s_entry) {
+ err = -ENOMEM;
+ goto entry_alloc_err;
+ }
+
+ s_entry->netdev = netdev;
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
+ mdata->metadata_reg_c_0);
+ if (err)
+ goto ingress_err;
+
+ mdata->slaves++;
+ list_add_tail(&s_entry->list, &mdata->slaves_list);
+ netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ return 0;
+
+ingress_err:
+ kfree(s_entry);
+entry_alloc_err:
+ if (!mdata->slaves)
+ mlx5e_rep_bond_metadata_release(mdata);
+ return err;
+}
+
+/* This must be called under rtnl_lock */
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ const struct net_device *netdev,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata)
+ return;
+
+ s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
+ if (!s_entry)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ /* Reset bond_metadata to zero first then reset all ingress/egress
+ * acls and rx rules of unslave representor's vport
+ */
+ mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
+ mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
+ mlx5e_rep_bond_update(priv, false);
+
+ list_del(&s_entry->list);
+
+ netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ if (--mdata->slaves == 0)
+ mlx5e_rep_bond_metadata_release(mdata);
+ kfree(s_entry);
+}
+
+static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+{
+ return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
+}
+
+static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changelowerstate_info *info;
+ struct netdev_lag_lower_state_info *lag_info;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+ struct list_head *iter;
+ struct net_device *dev;
+ u16 acl_vport_num;
+ u16 fwd_vport_num;
+ int err;
+
+ info = ptr;
+ lag_info = info->lower_state_info;
+ /* This is not an event of a representor becoming active slave */
+ if (!lag_info->tx_enabled)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ fwd_vport_num = rpriv->rep->vport;
+ lag_dev = netdev_master_upper_dev_get(netdev);
+ if (!lag_dev)
+ return;
+
+ netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
+ lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
+
+ /* Point everyone's egress acl to the vport of the active representor */
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ acl_vport_num = rpriv->rep->vport;
+ if (acl_vport_num != fwd_vport_num) {
+ /* Only single rx_rule for unique bond_metadata should be
+ * present, delete it if it's saved as passive vport's
+ * rx_rule with destination as passive vport's root_ft
+ */
+ mlx5e_rep_bond_update(priv, true);
+ err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
+ fwd_vport_num,
+ acl_vport_num);
+ if (err)
+ netdev_warn(dev,
+ "configure slave vport(%d) egress fwd, err(%d)",
+ acl_vport_num, err);
+ }
+ }
+
+ /* Insert new rx_rule for unique bond_metadata, save it as active vport's
+ * rx_rule with new destination as active vport's root_ft
+ */
+ err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
+ if (err)
+ netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
+ fwd_vport_num, err);
+}
+
+static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ lag_dev = info->upper_dev;
+
+ netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
+ info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
+
+ if (info->linking)
+ mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+ else
+ mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+}
+
+/* Bond device of representors and netdev events are used here in specific way
+ * to support eswitch vports bonding and to perform failover of eswitch vport
+ * by modifying the vport's egress acl of lower dev representors. Thus this
+ * also change the traditional behavior of lower dev under bond device.
+ * All non-representor netdevs or representors of other vendors as lower dev
+ * of bond device are not supported.
+ */
+static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_bond *bond;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return NOTIFY_DONE;
+
+ bond = container_of(nb, struct mlx5e_rep_bond, nb);
+ priv = netdev_priv(netdev);
+ rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
+ /* Verify VF representor is on the same device of the bond handling the netevent. */
+ if (rpriv->uplink_priv.bond != bond)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
+ mlx5e_rep_changelowerstate_event(netdev, ptr);
+ break;
+ case NETDEV_CHANGEUPPER:
+ mlx5e_rep_changeupper_event(netdev, ptr);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/* If HW support eswitch vports bonding, register a specific notifier to
+ * handle it when two or more representors are bonded
+ */
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv;
+ int ret = 0;
+
+ priv = netdev_priv(netdev);
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
+ goto out;
+
+ uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
+ if (!uplink_priv->bond) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
+ uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
+ ret = register_netdevice_notifier_dev_net(netdev,
+ &uplink_priv->bond->nb,
+ &uplink_priv->bond->nn);
+ if (ret) {
+ netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
+ kvfree(uplink_priv->bond);
+ uplink_priv->bond = NULL;
+ }
+
+out:
+ return ret;
+}
+
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
+ !rpriv->uplink_priv.bond)
+ return;
+
+ unregister_netdevice_notifier_dev_net(rpriv->netdev,
+ &rpriv->uplink_priv.bond->nb,
+ &rpriv->uplink_priv.bond->nn);
+ kvfree(rpriv->uplink_priv.bond);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
new file mode 100644
index 000000000..ce85b48d3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <net/netevent.h>
+#include <net/switchdev.h>
+#include "bridge.h"
+#include "esw/bridge.h"
+#include "en_rep.h"
+
+#define MLX5_ESW_BRIDGE_UPDATE_INTERVAL 1000
+
+struct mlx5_bridge_switchdev_fdb_work {
+ struct work_struct work;
+ struct switchdev_notifier_fdb_info fdb_info;
+ struct net_device *dev;
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ bool add;
+};
+
+static bool mlx5_esw_bridge_dev_same_esw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return esw == priv->mdev->priv.eswitch;
+}
+
+static bool mlx5_esw_bridge_dev_same_hw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev, *esw_mdev;
+ u64 system_guid, esw_system_guid;
+
+ mdev = priv->mdev;
+ esw_mdev = esw->dev;
+
+ system_guid = mlx5_query_nic_system_image_guid(mdev);
+ esw_system_guid = mlx5_query_nic_system_image_guid(esw_mdev);
+
+ return system_guid == esw_system_guid;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+ priv = netdev_priv(lower);
+ mdev = priv->mdev;
+ if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw))
+ return lower;
+ }
+
+ return NULL;
+}
+
+static struct net_device *
+mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+ u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ if (netif_is_lag_master(dev))
+ dev = mlx5_esw_bridge_lag_rep_get(dev, esw);
+
+ if (!dev || !mlx5e_eswitch_rep(dev) || !mlx5_esw_bridge_dev_same_hw(dev, esw))
+ return NULL;
+
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ *vport_num = rpriv->rep->vport;
+ *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id);
+ return dev;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+ u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ if (netif_is_lag_master(dev) || mlx5e_eswitch_rep(dev))
+ return mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, vport_num,
+ esw_owner_vhca_id);
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ struct net_device *rep;
+
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ rep = mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(lower_dev, esw, vport_num,
+ esw_owner_vhca_id);
+ if (rep)
+ return rep;
+ }
+
+ return NULL;
+}
+
+static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *rep,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5_esw_bridge_dev_same_esw(rep, esw))
+ return false;
+
+ priv = netdev_priv(rep);
+ mdev = priv->mdev;
+ if (netif_is_lag_master(dev))
+ return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev);
+ return true;
+}
+
+static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ netdev_nb);
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct net_device *upper = info->upper_dev, *rep;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ u16 vport_num, esw_owner_vhca_id;
+ struct netlink_ext_ack *extack;
+ int ifindex = upper->ifindex;
+ int err = 0;
+
+ if (!netif_is_bridge_master(upper))
+ return 0;
+
+ rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+ if (!rep)
+ return 0;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ if (mlx5_esw_bridge_is_local(dev, rep, esw))
+ err = info->linking ?
+ mlx5_esw_bridge_vport_link(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack) :
+ mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack);
+ else if (mlx5_esw_bridge_dev_same_hw(rep, esw))
+ err = info->linking ?
+ mlx5_esw_bridge_vport_peer_link(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack) :
+ mlx5_esw_bridge_vport_peer_unlink(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack);
+
+ return err;
+}
+
+static int
+mlx5_esw_bridge_changeupper_validate_netdev(void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct net_device *upper = info->upper_dev;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev))
+ return 0;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+ priv = netdev_priv(lower);
+ mdev = priv->mdev;
+ if (!mlx5_lag_is_active(mdev))
+ return -EAGAIN;
+ if (!mlx5_lag_is_shared_fdb(mdev))
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ int err = 0;
+
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
+ err = mlx5_esw_bridge_changeupper_validate_netdev(ptr);
+ break;
+
+ case NETDEV_CHANGEUPPER:
+ err = mlx5_esw_bridge_port_changeupper(nb, ptr);
+ break;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static int
+mlx5_esw_bridge_port_obj_add(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+ const struct switchdev_obj *obj = port_obj_info->obj;
+ const struct switchdev_obj_port_vlan *vlan;
+ u16 vport_num, esw_owner_vhca_id;
+ int err;
+
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_obj_info->handled = true;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid,
+ vlan->flags, br_offloads, extack);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static int
+mlx5_esw_bridge_port_obj_del(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ const struct switchdev_obj *obj = port_obj_info->obj;
+ const struct switchdev_obj_port_vlan *vlan;
+ u16 vport_num, esw_owner_vhca_id;
+
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_obj_info->handled = true;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int
+mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
+ struct switchdev_notifier_port_attr_info *port_attr_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info);
+ const struct switchdev_attr *attr = port_attr_info->attr;
+ u16 vport_num, esw_owner_vhca_id;
+ int err = 0;
+
+ if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_attr_info->handled = true;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+ if (attr->u.brport_flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flag is not supported");
+ err = -EINVAL;
+ }
+ break;
+ case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+ err = mlx5_esw_bridge_ageing_time_set(vport_num, esw_owner_vhca_id,
+ attr->u.ageing_time, br_offloads);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+ err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id,
+ attr->u.vlan_filtering, br_offloads);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL:
+ err = mlx5_esw_bridge_vlan_proto_set(vport_num,
+ esw_owner_vhca_id,
+ attr->u.vlan_protocol,
+ br_offloads);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5_esw_bridge_event_blocking(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ nb_blk);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ int err;
+
+ switch (event) {
+ case SWITCHDEV_PORT_OBJ_ADD:
+ err = mlx5_esw_bridge_port_obj_add(dev, ptr, br_offloads);
+ break;
+ case SWITCHDEV_PORT_OBJ_DEL:
+ err = mlx5_esw_bridge_port_obj_del(dev, ptr, br_offloads);
+ break;
+ case SWITCHDEV_PORT_ATTR_SET:
+ err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
+ break;
+ default:
+ err = 0;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static void
+mlx5_esw_bridge_cleanup_switchdev_fdb_work(struct mlx5_bridge_switchdev_fdb_work *fdb_work)
+{
+ dev_put(fdb_work->dev);
+ kfree(fdb_work->fdb_info.addr);
+ kfree(fdb_work);
+}
+
+static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work)
+{
+ struct mlx5_bridge_switchdev_fdb_work *fdb_work =
+ container_of(work, struct mlx5_bridge_switchdev_fdb_work, work);
+ struct switchdev_notifier_fdb_info *fdb_info =
+ &fdb_work->fdb_info;
+ struct mlx5_esw_bridge_offloads *br_offloads =
+ fdb_work->br_offloads;
+ struct net_device *dev = fdb_work->dev;
+ u16 vport_num, esw_owner_vhca_id;
+
+ rtnl_lock();
+
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ goto out;
+
+ if (fdb_work->add)
+ mlx5_esw_bridge_fdb_create(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+ else
+ mlx5_esw_bridge_fdb_remove(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+
+out:
+ rtnl_unlock();
+ mlx5_esw_bridge_cleanup_switchdev_fdb_work(fdb_work);
+}
+
+static struct mlx5_bridge_switchdev_fdb_work *
+mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add,
+ struct switchdev_notifier_fdb_info *fdb_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_bridge_switchdev_fdb_work *work;
+ u8 *addr;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_WORK(&work->work, mlx5_esw_bridge_switchdev_fdb_event_work);
+ memcpy(&work->fdb_info, fdb_info, sizeof(work->fdb_info));
+
+ addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+ if (!addr) {
+ kfree(work);
+ return ERR_PTR(-ENOMEM);
+ }
+ ether_addr_copy(addr, fdb_info->addr);
+ work->fdb_info.addr = addr;
+
+ dev_hold(dev);
+ work->dev = dev;
+ work->br_offloads = br_offloads;
+ work->add = add;
+ return work;
+}
+
+static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ nb);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct mlx5_bridge_switchdev_fdb_work *work;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct switchdev_notifier_info *info = ptr;
+ u16 vport_num, esw_owner_vhca_id;
+ struct net_device *upper, *rep;
+
+ if (event == SWITCHDEV_PORT_ATTR_SET) {
+ int err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
+
+ return notifier_from_errno(err);
+ }
+
+ upper = netdev_master_upper_dev_get_rcu(dev);
+ if (!upper)
+ return NOTIFY_DONE;
+ if (!netif_is_bridge_master(upper))
+ return NOTIFY_DONE;
+
+ rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+ if (!rep)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE:
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+ mlx5_esw_bridge_fdb_update_used(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+ /* only handle the event on peers */
+ if (mlx5_esw_bridge_is_local(dev, rep, esw))
+ break;
+ fallthrough;
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+
+ work = mlx5_esw_bridge_init_switchdev_fdb_work(dev,
+ event == SWITCHDEV_FDB_ADD_TO_DEVICE,
+ fdb_info,
+ br_offloads);
+ if (IS_ERR(work)) {
+ WARN_ONCE(1, "Failed to init switchdev work, err=%ld",
+ PTR_ERR(work));
+ return notifier_from_errno(PTR_ERR(work));
+ }
+
+ queue_work(br_offloads->wq, &work->work);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static void mlx5_esw_bridge_update_work(struct work_struct *work)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(work,
+ struct mlx5_esw_bridge_offloads,
+ update_work.work);
+
+ rtnl_lock();
+ mlx5_esw_bridge_update(br_offloads);
+ rtnl_unlock();
+
+ queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+ msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL));
+}
+
+void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw =
+ mdev->priv.eswitch;
+ int err;
+
+ rtnl_lock();
+ br_offloads = mlx5_esw_bridge_init(esw);
+ rtnl_unlock();
+ if (IS_ERR(br_offloads)) {
+ esw_warn(mdev, "Failed to init esw bridge (err=%ld)\n", PTR_ERR(br_offloads));
+ return;
+ }
+
+ br_offloads->wq = alloc_ordered_workqueue("mlx5_bridge_wq", 0);
+ if (!br_offloads->wq) {
+ esw_warn(mdev, "Failed to allocate bridge offloads workqueue\n");
+ goto err_alloc_wq;
+ }
+
+ br_offloads->nb.notifier_call = mlx5_esw_bridge_switchdev_event;
+ err = register_switchdev_notifier(&br_offloads->nb);
+ if (err) {
+ esw_warn(mdev, "Failed to register switchdev notifier (err=%d)\n", err);
+ goto err_register_swdev;
+ }
+
+ br_offloads->nb_blk.notifier_call = mlx5_esw_bridge_event_blocking;
+ err = register_switchdev_blocking_notifier(&br_offloads->nb_blk);
+ if (err) {
+ esw_warn(mdev, "Failed to register blocking switchdev notifier (err=%d)\n", err);
+ goto err_register_swdev_blk;
+ }
+
+ br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
+ err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ if (err) {
+ esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
+ err);
+ goto err_register_netdev;
+ }
+ INIT_DELAYED_WORK(&br_offloads->update_work, mlx5_esw_bridge_update_work);
+ queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+ msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL));
+ return;
+
+err_register_netdev:
+ unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+err_register_swdev_blk:
+ unregister_switchdev_notifier(&br_offloads->nb);
+err_register_swdev:
+ destroy_workqueue(br_offloads->wq);
+err_alloc_wq:
+ rtnl_lock();
+ mlx5_esw_bridge_cleanup(esw);
+ rtnl_unlock();
+}
+
+void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw =
+ mdev->priv.eswitch;
+
+ br_offloads = esw->br_offloads;
+ if (!br_offloads)
+ return;
+
+ cancel_delayed_work_sync(&br_offloads->update_work);
+ unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+ unregister_switchdev_notifier(&br_offloads->nb);
+ destroy_workqueue(br_offloads->wq);
+ rtnl_lock();
+ mlx5_esw_bridge_cleanup(esw);
+ rtnl_unlock();
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h
new file mode 100644
index 000000000..fbeb64242
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_BRIDGE__
+#define __MLX5_EN_REP_BRIDGE__
+
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_MLX5_BRIDGE)
+
+void mlx5e_rep_bridge_init(struct mlx5e_priv *priv);
+void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_BRIDGE */
+
+static inline void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) {}
+
+#endif /* CONFIG_MLX5_BRIDGE */
+
+#endif /* __MLX5_EN_REP_BRIDGE__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
new file mode 100644
index 000000000..2e9bee4e5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include <net/arp.h>
+#include "neigh.h"
+#include "tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+#include "diag/en_rep_tracepoint.h"
+
+static unsigned long mlx5e_rep_ipv6_interval(void)
+{
+ if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
+ return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
+
+ return ~0UL;
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+ unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
+}
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ mlx5_fc_queue_stats_work(priv->mdev,
+ &neigh_update->neigh_stats_work,
+ neigh_update->min_interval);
+}
+
+static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+ return refcount_inc_not_zero(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+ if (refcount_dec_and_test(&nhe->refcnt)) {
+ mlx5e_rep_neigh_entry_remove(nhe);
+ kfree_rcu(nhe, rcu);
+ }
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh_hash_entry *next = NULL;
+
+ rcu_read_lock();
+
+ for (next = nhe ?
+ list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &nhe->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list) :
+ list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list);
+ next;
+ next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &next->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list))
+ if (mlx5e_rep_neigh_entry_hold(next))
+ break;
+
+ rcu_read_unlock();
+
+ if (nhe)
+ mlx5e_rep_neigh_entry_release(nhe);
+
+ return next;
+}
+
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+ neigh_update.neigh_stats_work.work);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+
+ rtnl_lock();
+ if (!list_empty(&rpriv->neigh_update.neigh_list))
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
+ mlx5e_tc_update_neigh_used_value(nhe);
+
+ rtnl_unlock();
+}
+
+struct neigh_update_work {
+ struct work_struct work;
+ struct neighbour *n;
+ struct mlx5e_neigh_hash_entry *nhe;
+};
+
+static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
+{
+ neigh_release(update_work->n);
+ mlx5e_rep_neigh_entry_release(update_work->nhe);
+ kfree(update_work);
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+ struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
+ work);
+ struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
+ struct neighbour *n = update_work->n;
+ struct mlx5e_encap_entry *e = NULL;
+ bool neigh_connected, same_dev;
+ unsigned char ha[ETH_ALEN];
+ u8 nud_state, dead;
+
+ rtnl_lock();
+
+ /* If these parameters are changed after we release the lock,
+ * we'll receive another event letting us know about it.
+ * We use this lock to avoid inconsistency between the neigh validity
+ * and it's hw address.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
+ read_unlock_bh(&n->lock);
+
+ neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+ trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
+
+ if (!same_dev)
+ goto out;
+
+ /* mlx5e_get_next_init_encap() releases previous encap before returning
+ * the next one.
+ */
+ while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
+ mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
+
+out:
+ rtnl_unlock();
+ mlx5e_release_neigh_update_work(update_work);
+}
+
+static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
+ struct neighbour *n)
+{
+ struct neigh_update_work *update_work;
+ struct mlx5e_neigh_hash_entry *nhe;
+ struct mlx5e_neigh m_neigh = {};
+
+ update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
+ if (WARN_ON(!update_work))
+ return NULL;
+
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ /* Obtain reference to nhe as last step in order not to release it in
+ * atomic context.
+ */
+ rcu_read_lock();
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ rcu_read_unlock();
+ if (!nhe) {
+ kfree(update_work);
+ return NULL;
+ }
+
+ INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
+ neigh_hold(n);
+ update_work->n = n;
+ update_work->nhe = nhe;
+
+ return update_work;
+}
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ neigh_update.netevent_nb);
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+ struct neigh_update_work *update_work;
+ struct neigh_parms *p;
+ struct neighbour *n;
+ bool found = false;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+#else
+ if (n->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ update_work = mlx5e_alloc_neigh_update_work(priv, n);
+ if (!update_work)
+ return NOTIFY_DONE;
+
+ queue_work(priv->wq, &update_work->work);
+ break;
+
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We check the device is present since we don't care about
+ * changes in the default table, we only care about changes
+ * done per device delay prob time parameter.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
+#else
+ if (!p->dev || p->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
+ neigh_list) {
+ if (p->dev == READ_ONCE(nhe->neigh_dev)) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ if (!found)
+ return NOTIFY_DONE;
+
+ neigh_update->min_interval = min_t(unsigned long,
+ NEIGH_VAR(p, DELAY_PROBE_TIME),
+ neigh_update->min_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev,
+ neigh_update->min_interval);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+ .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+ .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+ .key_len = sizeof(struct mlx5e_neigh),
+ .automatic_shrinking = true,
+};
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ int err;
+
+ err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+ if (err)
+ goto out_err;
+
+ INIT_LIST_HEAD(&neigh_update->neigh_list);
+ mutex_init(&neigh_update->encap_lock);
+ INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+ mlx5e_rep_neigh_stats_work);
+ mlx5e_rep_neigh_update_init_interval(rpriv);
+
+ neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&neigh_update->netevent_nb);
+ if (err)
+ goto out_notifier;
+ return 0;
+
+out_notifier:
+ neigh_update->netevent_nb.notifier_call = NULL;
+ rhashtable_destroy(&neigh_update->neigh_ht);
+out_err:
+ netdev_warn(rpriv->netdev,
+ "Failed to initialize neighbours handling for vport %d\n",
+ rpriv->rep->vport);
+ return err;
+}
+
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (!rpriv->neigh_update.netevent_nb.notifier_call)
+ return;
+
+ unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+ flush_workqueue(priv->wq); /* flush neigh update works */
+
+ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+ mutex_destroy(&neigh_update->encap_lock);
+ rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+
+ list_del_rcu(&nhe->neigh_list);
+
+ rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under the representor's encap_lock or
+ * inside rcu read lock section.
+ */
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+ mlx5e_neigh_ht_params);
+ return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
+}
+
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
+ struct mlx5e_neigh_hash_entry **nhe)
+{
+ int err;
+
+ *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+ if (!*nhe)
+ return -ENOMEM;
+
+ (*nhe)->priv = priv;
+ memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
+ spin_lock_init(&(*nhe)->encap_list_lock);
+ INIT_LIST_HEAD(&(*nhe)->encap_list);
+ refcount_set(&(*nhe)->refcnt, 1);
+ WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
+
+ err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+ if (err)
+ goto out_free;
+ return 0;
+
+out_free:
+ kfree(*nhe);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
new file mode 100644
index 000000000..6fe0ab970
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_NEIGH__
+#define __MLX5_EN_REP_NEIGH__
+
+#include "en.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv);
+
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh);
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
+ struct mlx5e_neigh_hash_entry **nhe);
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline int
+mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_NEIGH__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
new file mode 100644
index 000000000..fac7e3ff2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -0,0 +1,900 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <net/dst_metadata.h>
+#include <linux/netdevice.h>
+#include <linux/if_macvlan.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include "tc.h"
+#include "neigh.h"
+#include "en_rep.h"
+#include "eswitch.h"
+#include "lib/fs_chains.h"
+#include "en/tc_ct.h"
+#include "en/mapping.h"
+#include "en/tc_tun.h"
+#include "lib/port_tun.h"
+#include "en/tc/sample.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en/tc/int_port.h"
+#include "en/tc/act/act.h"
+
+struct mlx5e_rep_indr_block_priv {
+ struct net_device *netdev;
+ struct mlx5e_rep_priv *rpriv;
+ enum flow_block_binder_type binder_type;
+
+ struct list_head list;
+};
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+ struct mlx5e_neigh_hash_entry *nhe;
+ int err;
+
+ err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+ if (err)
+ return err;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh);
+ if (!nhe) {
+ err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe);
+ if (err) {
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+ mlx5_tun_entropy_refcount_dec(tun_entropy,
+ e->reformat_type);
+ return err;
+ }
+ }
+
+ e->nhe = nhe;
+ spin_lock(&nhe->encap_list_lock);
+ list_add_rcu(&e->encap_list, &nhe->encap_list);
+ spin_unlock(&nhe->encap_list_lock);
+
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+
+ return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+
+ if (!e->nhe)
+ return;
+
+ spin_lock(&e->nhe->encap_list_lock);
+ list_del_rcu(&e->encap_list);
+ spin_unlock(&e->nhe->encap_list_lock);
+
+ mlx5e_rep_neigh_entry_release(e->nhe);
+ e->nhe = NULL;
+ mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
+}
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN])
+{
+ struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool encap_connected;
+ LIST_HEAD(flow_list);
+
+ ASSERT_RTNL();
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+ if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
+ goto unlock;
+
+ mlx5e_take_all_encap_flows(e, &flow_list);
+
+ if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
+ (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
+ mlx5e_tc_encap_flows_del(priv, e, &flow_list);
+
+ if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ struct net_device *route_dev;
+
+ ether_addr_copy(e->h_dest, ha);
+ ether_addr_copy(eth->h_dest, ha);
+ /* Update the encap source mac, in case that we delete
+ * the flows when encap source mac changed.
+ */
+ route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex);
+ if (route_dev)
+ ether_addr_copy(eth->h_source, route_dev->dev_addr);
+
+ mlx5e_tc_encap_flows_add(priv, e, &flow_list);
+ }
+unlock:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ mlx5e_put_flow_list(priv, &flow_list);
+}
+
+static int
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct flow_cls_offload *cls_flower, int flags)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_DESTROY:
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_STATS:
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static
+int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ switch (ma->command) {
+ case TC_CLSMATCHALL_REPLACE:
+ return mlx5e_tc_configure_matchall(priv, ma);
+ case TC_CLSMATCHALL_DESTROY:
+ return mlx5e_tc_delete_matchall(priv, ma);
+ case TC_CLSMATCHALL_STATS:
+ mlx5e_tc_stats_matchall(priv, ma);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!priv->netdev || !netif_device_present(priv->netdev))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
+ case TC_SETUP_CLSMATCHALL:
+ return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload tmp, *f = type_data;
+ struct mlx5e_priv *priv = cb_priv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ flags = MLX5_TC_FLAG(INGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+ esw = priv->mdev->priv.eswitch;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ if (!mlx5_chains_prios_supported(esw_chains(esw)))
+ return -EOPNOTSUPP;
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)))
+ return -EOPNOTSUPP;
+ if (tmp.common.chain_index != 0)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
+ tmp.common.prio++;
+ err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
+static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct flow_block_offload *f = type_data;
+
+ f->unlocked_driver_cb = true;
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_tc_cb_list,
+ mlx5e_rep_setup_tc_cb,
+ priv, priv, true);
+ case TC_SETUP_FT:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_ft_cb_list,
+ mlx5e_rep_setup_ft_cb,
+ priv, priv, true);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ int err;
+
+ mutex_init(&uplink_priv->unready_flows_lock);
+ INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(uplink_priv);
+ return err;
+}
+
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ /* delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv);
+ mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+}
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
+ mlx5e_tc_reoffload_flows_work);
+}
+
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+}
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
+
+ return NOTIFY_OK;
+}
+
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev,
+ enum flow_block_binder_type binder_type)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv;
+
+ list_for_each_entry(cb_priv,
+ &rpriv->uplink_priv.tc_indr_block_priv_list,
+ list)
+ if (cb_priv->netdev == netdev &&
+ cb_priv->binder_type == binder_type)
+ return cb_priv;
+
+ return NULL;
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+ struct flow_cls_offload *flower,
+ struct mlx5e_rep_indr_block_priv *indr_priv,
+ unsigned long flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+ int err = 0;
+
+ if (!netif_device_present(indr_priv->rpriv->netdev))
+ return -EOPNOTSUPP;
+
+ switch (flower->command) {
+ case FLOW_CLS_REPLACE:
+ err = mlx5e_configure_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_DESTROY:
+ err = mlx5e_delete_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_STATS:
+ err = mlx5e_stats_flower(netdev, priv, flower, flags);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+ flags |= (priv->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) ?
+ MLX5_TC_FLAG(EGRESS) :
+ MLX5_TC_FLAG(INGRESS);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+ struct flow_cls_offload *f = type_data;
+ struct flow_cls_offload tmp;
+ struct mlx5e_priv *mpriv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ mpriv = netdev_priv(priv->rpriv->netdev);
+ esw = mpriv->mdev->priv.eswitch;
+
+ flags = MLX5_TC_FLAG(EGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (!mlx5_chains_prios_supported(esw_chains(esw)) ||
+ tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) ||
+ tmp.common.chain_index)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
+ tmp.common.prio++;
+ err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void mlx5e_rep_indr_block_unbind(void *cb_priv)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
+
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
+static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ return macvlan->mode == MACVLAN_MODE_PASSTHRU;
+}
+
+static int
+mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
+ struct mlx5e_rep_priv *rpriv,
+ struct flow_block_offload *f,
+ flow_setup_cb_t *setup_cb,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool is_ovs_int_port = netif_is_ovs_master(netdev);
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ struct flow_block_cb *block_cb;
+
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+ !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) &&
+ !is_ovs_int_port) {
+ if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev))
+ return -EOPNOTSUPP;
+ if (!mlx5e_rep_macvlan_mode_supported(netdev)) {
+ netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+ f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ return -EOPNOTSUPP;
+
+ if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port)
+ return -EOPNOTSUPP;
+
+ if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw))
+ return -EOPNOTSUPP;
+
+ f->unlocked_driver_cb = true;
+ f->driver_block_list = &mlx5e_block_cb_list;
+
+ switch (f->command) {
+ case FLOW_BLOCK_BIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
+ if (indr_priv)
+ return -EEXIST;
+
+ indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+ if (!indr_priv)
+ return -ENOMEM;
+
+ indr_priv->netdev = netdev;
+ indr_priv->rpriv = rpriv;
+ indr_priv->binder_type = f->binder_type;
+ list_add(&indr_priv->list,
+ &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+ block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv,
+ mlx5e_rep_indr_block_unbind,
+ f, netdev, sch, data, rpriv,
+ cleanup);
+ if (IS_ERR(block_cb)) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ return PTR_ERR(block_cb);
+ }
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
+
+ return 0;
+ case FLOW_BLOCK_UNBIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
+ if (!indr_priv)
+ return -ENOENT;
+
+ block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
+ if (!block_cb)
+ return -ENOENT;
+
+ flow_indr_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int
+mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct flow_action_entry *action;
+ struct mlx5e_tc_act *act;
+ bool add = false;
+ int i;
+
+ /* There is no use case currently for more than one action (e.g. pedit).
+ * when there will be, need to handle cleaning multiple actions on err.
+ */
+ if (!flow_offload_has_one_action(&fl_act->action))
+ return -EOPNOTSUPP;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ flow_action_for_each(i, action, &fl_act->action) {
+ act = mlx5e_tc_act_get(action->id, ns_type);
+ if (!act)
+ continue;
+
+ if (!act->offload_action)
+ continue;
+
+ if (!act->offload_action(priv, fl_act, action))
+ add = true;
+ }
+
+ return add ? 0 : -EOPNOTSUPP;
+}
+
+static int
+mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->destroy_action)
+ return -EOPNOTSUPP;
+
+ return act->destroy_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->stats_action)
+ return -EOPNOTSUPP;
+
+ return act->stats_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ switch (fl_act->command) {
+ case FLOW_ACT_REPLACE:
+ return mlx5e_rep_indr_replace_act(rpriv, fl_act);
+ case FLOW_ACT_DESTROY:
+ return mlx5e_rep_indr_destroy_act(rpriv, fl_act);
+ case FLOW_ACT_STATS:
+ return mlx5e_rep_indr_stats_act(rpriv, fl_act);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv,
+ enum tc_setup_type type,
+ void *data)
+{
+ if (!data)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_ACT:
+ return mlx5e_rep_indr_setup_act(rpriv, data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static
+int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
+ enum tc_setup_type type, void *type_data,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ if (!netdev)
+ return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data);
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
+ mlx5e_rep_indr_setup_tc_cb,
+ data, cleanup);
+ case TC_SETUP_FT:
+ return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
+ mlx5e_rep_indr_setup_ft_cb,
+ data, cleanup);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+
+ /* init indirect block notifications */
+ INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+
+ return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv);
+}
+
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
+{
+ flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv,
+ mlx5e_rep_indr_block_unbind);
+}
+
+static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv,
+ u32 tunnel_id)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct tunnel_match_enc_opts enc_opts = {};
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct metadata_dst *tun_dst;
+ struct tunnel_match_key key;
+ u32 tun_id, enc_opts_id;
+ struct net_device *dev;
+ int err;
+
+ enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
+ tun_id = tunnel_id >> ENC_OPTS_BITS;
+
+ if (!tun_id)
+ return true;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
+ if (err) {
+ WARN_ON_ONCE(true);
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel for tun_id: %d, err: %d\n",
+ tun_id, err);
+ return false;
+ }
+
+ if (enc_opts_id) {
+ err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
+ enc_opts_id, &enc_opts);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
+ enc_opts_id, err);
+ return false;
+ }
+ }
+
+ if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
+ key.enc_ip.tos, key.enc_ip.ttl,
+ key.enc_tp.dst, TUNNEL_KEY,
+ key32_to_tunnel_id(key.enc_key_id.keyid),
+ enc_opts.key.len);
+ } else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
+ key.enc_ip.tos, key.enc_ip.ttl,
+ key.enc_tp.dst, 0, TUNNEL_KEY,
+ key32_to_tunnel_id(key.enc_key_id.keyid),
+ enc_opts.key.len);
+ } else {
+ netdev_dbg(priv->netdev,
+ "Couldn't restore tunnel, unsupported addr_type: %d\n",
+ key.enc_control.addr_type);
+ return false;
+ }
+
+ if (!tun_dst) {
+ netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
+ return false;
+ }
+
+ tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
+
+ if (enc_opts.key.len)
+ ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
+ enc_opts.key.data,
+ enc_opts.key.len,
+ enc_opts.key.dst_opt_type);
+
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
+ dev = dev_get_by_index(&init_net, key.filter_ifindex);
+ if (!dev) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel device with ifindex: %d\n",
+ key.filter_ifindex);
+ return false;
+ }
+
+ /* Set fwd_dev so we do dev_put() after datapath */
+ tc_priv->fwd_dev = dev;
+
+ skb->dev = dev;
+
+ return true;
+}
+
+static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+ struct mlx5e_priv *priv = netdev_priv(skb->dev);
+ u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (chain) {
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct tc_skb_ext *tc_skb_ext;
+ struct mlx5_eswitch *esw;
+ u32 zone_restore_id;
+
+ tc_skb_ext = tc_skb_ext_alloc(skb);
+ if (!tc_skb_ext) {
+ WARN_ON(1);
+ return false;
+ }
+ tc_skb_ext->chain = chain;
+ zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
+ esw = priv->mdev->priv.eswitch;
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
+ zone_restore_id))
+ return false;
+ }
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+ return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+}
+
+static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (tc_priv->fwd_dev)
+ dev_put(tc_priv->fwd_dev);
+}
+
+static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_mapped_obj *mapped_obj,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
+ netdev_dbg(priv->netdev,
+ "Failed to restore tunnel info for sampled packet\n");
+ return;
+ }
+ mlx5e_tc_sample_skb(skb, mapped_obj);
+ mlx5_rep_tc_post_napi_receive(tc_priv);
+}
+
+static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_mapped_obj *mapped_obj,
+ struct mlx5e_tc_update_priv *tc_priv,
+ bool *forward_tx,
+ u32 reg_c1)
+{
+ u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ /* Tunnel restore takes precedence over int port restore */
+ if (tunnel_id)
+ return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
+ mapped_obj->int_port_metadata, forward_tx)) {
+ /* Set fwd_dev for future dev_put */
+ tc_priv->fwd_dev = skb->dev;
+
+ return true;
+ }
+
+ return false;
+}
+
+void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
+ struct sk_buff *skb)
+{
+ u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
+ struct mlx5e_tc_update_priv tc_priv = {};
+ struct mlx5_mapped_obj mapped_obj;
+ struct mlx5_eswitch *esw;
+ bool forward_tx = false;
+ struct mlx5e_priv *priv;
+ u32 reg_c0;
+ int err;
+
+ reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
+ if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
+ goto forward;
+
+ /* If reg_c0 is not equal to the default flow tag then skb->mark
+ * is not supported and must be reset back to 0.
+ */
+ skb->mark = 0;
+
+ priv = netdev_priv(skb->dev);
+ esw = priv->mdev->priv.eswitch;
+ err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find mapped object for reg_c0: %d, err: %d\n",
+ reg_c0, err);
+ goto free_skb;
+ }
+
+ if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+ if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) &&
+ !mlx5_ipsec_is_rx_flow(cqe))
+ goto free_skb;
+ } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
+ mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv);
+ goto free_skb;
+ } else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) {
+ if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv,
+ &forward_tx, reg_c1))
+ goto free_skb;
+ } else {
+ netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
+ goto free_skb;
+ }
+
+forward:
+ if (forward_tx)
+ dev_queue_xmit(skb);
+ else
+ napi_gro_receive(rq->cq.napi, skb);
+
+ mlx5_rep_tc_post_napi_receive(&tc_priv);
+
+ return;
+
+free_skb:
+ dev_kfree_skb_any(skb);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
new file mode 100644
index 000000000..7c9dd3a75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_TC_H__
+#define __MLX5_EN_REP_TC_H__
+
+#include <linux/skbuff.h>
+#include "en_tc.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv);
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv);
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv);
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv);
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv);
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN]);
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev);
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data);
+
+void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
+ struct sk_buff *skb);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+struct mlx5e_rep_priv;
+static inline int
+mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+static inline int
+mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) {}
+
+static inline void
+mlx5e_rep_tc_enable(struct mlx5e_priv *priv) {}
+static inline void
+mlx5e_rep_tc_disable(struct mlx5e_priv *priv) {}
+
+static inline int
+mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) { return NOTIFY_DONE; }
+
+static inline int
+mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data) { return -EOPNOTSUPP; }
+
+static inline void
+mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
+ struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); }
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
new file mode 100644
index 000000000..9b1f1369a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -0,0 +1,759 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "health.h"
+#include "params.h"
+#include "txrx.h"
+#include "devlink.h"
+#include "ptp.h"
+#include "lib/tout.h"
+
+static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+ void *out;
+ void *rqc;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_rq(dev, rqn, out);
+ if (err)
+ goto out;
+
+ rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+ *state = MLX5_GET(rqc, rqc, state);
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
+{
+ struct mlx5_core_dev *dev = icosq->channel->mdev;
+ unsigned long exp_time;
+
+ exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
+
+ while (time_before(jiffies, exp_time)) {
+ if (icosq->cc == icosq->pc)
+ return 0;
+
+ msleep(20);
+ }
+
+ netdev_err(icosq->channel->netdev,
+ "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n",
+ icosq->sqn, icosq->cc, icosq->pc);
+
+ return -ETIMEDOUT;
+}
+
+static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
+{
+ WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+ icosq->sqn, icosq->cc, icosq->pc);
+ icosq->cc = 0;
+ icosq->pc = 0;
+}
+
+static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+{
+ struct mlx5e_rq *xskrq = NULL;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_icosq *icosq;
+ struct net_device *dev;
+ struct mlx5e_rq *rq;
+ u8 state;
+ int err;
+
+ icosq = ctx;
+
+ mutex_lock(&icosq->channel->icosq_recovery_lock);
+
+ /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
+ rq = &icosq->channel->rq;
+ if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
+ xskrq = &icosq->channel->xskrq;
+ mdev = icosq->channel->mdev;
+ dev = icosq->channel->netdev;
+ err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
+ if (err) {
+ netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n",
+ icosq->sqn, err);
+ goto out;
+ }
+
+ if (state != MLX5_SQC_STATE_ERR)
+ goto out;
+
+ mlx5e_deactivate_rq(rq);
+ if (xskrq)
+ mlx5e_deactivate_rq(xskrq);
+
+ err = mlx5e_wait_for_icosq_flush(icosq);
+ if (err)
+ goto out;
+
+ mlx5e_deactivate_icosq(icosq);
+
+ /* At this point, both the rq and the icosq are disabled */
+
+ err = mlx5e_health_sq_to_ready(mdev, dev, icosq->sqn);
+ if (err)
+ goto out;
+
+ mlx5e_reset_icosq_cc_pc(icosq);
+
+ mlx5e_free_rx_in_progress_descs(rq);
+ if (xskrq)
+ mlx5e_free_rx_in_progress_descs(xskrq);
+
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+ mlx5e_activate_icosq(icosq);
+
+ mlx5e_activate_rq(rq);
+ rq->stats->recover++;
+
+ if (xskrq) {
+ mlx5e_activate_rq(xskrq);
+ xskrq->stats->recover++;
+ }
+
+ mlx5e_trigger_napi_icosq(icosq->channel);
+
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+
+ return 0;
+out:
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+ return err;
+}
+
+static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
+{
+ struct mlx5e_rq *rq = ctx;
+ int err;
+
+ mlx5e_deactivate_rq(rq);
+ err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR);
+ clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
+ if (err)
+ return err;
+
+ mlx5e_activate_rq(rq);
+ rq->stats->recover++;
+ if (rq->channel)
+ mlx5e_trigger_napi_icosq(rq->channel);
+ else
+ mlx5e_trigger_napi_sched(rq->cq.napi);
+ return 0;
+}
+
+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
+{
+ struct mlx5_eq_comp *eq;
+ struct mlx5e_rq *rq;
+ int err;
+
+ rq = ctx;
+ eq = rq->cq.mcq.eq;
+
+ err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats);
+ if (err && rq->icosq)
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);
+
+ return err;
+}
+
+static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
+{
+ return err_ctx->recover(err_ctx->ctx);
+}
+
+static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
+ void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) :
+ mlx5e_health_recover_channels(priv);
+}
+
+static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "WQE size",
+ mlx5_wq_cyc_get_size(&icosq->wq));
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq));
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ u16 wqe_counter;
+ int wqes_sz;
+ u8 hw_state;
+ u16 wq_head;
+ int err;
+
+ err = mlx5e_query_rq_state(rq->mdev, rq->rqn, &hw_state);
+ if (err)
+ return err;
+
+ wqes_sz = mlx5e_rqwq_get_cur_sz(rq);
+ wq_head = mlx5e_rqwq_get_head(rq);
+ wqe_counter = mlx5e_rqwq_get_wqe_counter(rq);
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head);
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg);
+ if (err)
+ return err;
+
+ if (rq->icosq) {
+ struct mlx5e_icosq *icosq = rq->icosq;
+ u8 icosq_hw_state;
+
+ err = mlx5_core_query_sq_state(rq->mdev, icosq->sqn, &icosq_hw_state);
+ if (err)
+ return err;
+
+ err = mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix);
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
+ if (err)
+ return err;
+
+ return devlink_fmsg_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = rq->priv;
+ struct mlx5e_params *params;
+ u32 rq_stride, rq_sz;
+ bool real_time;
+ int err;
+
+ params = &priv->channels.params;
+ rq_sz = mlx5e_rqwq_get_size(rq);
+ real_time = mlx5_is_real_time_rq(priv->mdev);
+ rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter);
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq;
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config");
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg);
+ if (err)
+ return err;
+
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+ err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg);
+ if (err)
+ return err;
+ }
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ int i, err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg);
+ if (err)
+ goto unlock;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
+ if (err)
+ goto unlock;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5e_rq *rq;
+
+ rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ?
+ &c->xskrq : &c->rq;
+
+ err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
+ if (err)
+ goto unlock;
+ }
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+ err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg);
+ if (err)
+ goto unlock;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_txqsq *icosq = ctx;
+ struct mlx5_rsc_key key = {};
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = icosq->sqn;
+ key.num_of_obj1 = 1;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
+ key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5_rsc_key key = {};
+ struct mlx5e_rq *rq = ctx;
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = rq->rqn;
+ key.num_of_obj1 = 1;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_RCV_BUFF;
+ key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ struct mlx5_rsc_key key = {};
+ int i, err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
+ if (err)
+ return err;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
+
+ err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ");
+ if (err)
+ return err;
+ }
+
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+ err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ");
+ if (err)
+ return err;
+ }
+
+ return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
+ struct mlx5e_err_ctx *err_ctx,
+ struct devlink_fmsg *fmsg)
+{
+ return err_ctx->dump(priv, fmsg, err_ctx->ctx);
+}
+
+static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
+ mlx5e_rx_reporter_dump_all_rqs(priv, fmsg);
+}
+
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_icosq *icosq = rq->icosq;
+ struct mlx5e_priv *priv = rq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+ char icosq_str[32] = {};
+
+ err_ctx.ctx = rq;
+ err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
+ err_ctx.dump = mlx5e_rx_reporter_dump_rq;
+
+ if (icosq)
+ snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn);
+ snprintf(err_str, sizeof(err_str),
+ "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x",
+ rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_priv *priv = rq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = rq;
+ err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover;
+ err_ctx.dump = mlx5e_rx_reporter_dump_rq;
+ snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
+{
+ struct mlx5e_priv *priv = icosq->channel->priv;
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = icosq;
+ err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover;
+ err_ctx.dump = mlx5e_rx_reporter_dump_icosq;
+ snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
+{
+ mutex_lock(&c->icosq_recovery_lock);
+}
+
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
+{
+ mutex_unlock(&c->icosq_recovery_lock);
+}
+
+static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
+ .name = "rx",
+ .recover = mlx5e_rx_reporter_recover,
+ .diagnose = mlx5e_rx_reporter_diagnose,
+ .dump = mlx5e_rx_reporter_dump,
+};
+
+#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500
+
+void mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops,
+ MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv);
+ if (IS_ERR(reporter)) {
+ netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
+ PTR_ERR(reporter));
+ return;
+ }
+ priv->rx_reporter = reporter;
+}
+
+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
+{
+ if (!priv->rx_reporter)
+ return;
+
+ devlink_port_health_reporter_destroy(priv->rx_reporter);
+ priv->rx_reporter = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
new file mode 100644
index 000000000..60bc5b577
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -0,0 +1,614 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "health.h"
+#include "en/ptp.h"
+#include "en/devlink.h"
+#include "lib/tout.h"
+
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_core_dev *dev = sq->mdev;
+ unsigned long exp_time;
+
+ exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
+
+ while (time_before(jiffies, exp_time)) {
+ if (sq->cc == sq->pc)
+ return 0;
+
+ msleep(20);
+ }
+
+ netdev_err(sq->netdev,
+ "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+
+ return -ETIMEDOUT;
+}
+
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+ WARN_ONCE(sq->cc != sq->pc,
+ "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+ sq->cc = 0;
+ sq->dma_fifo_cc = 0;
+ sq->pc = 0;
+}
+
+static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
+{
+ struct mlx5_core_dev *mdev;
+ struct net_device *dev;
+ struct mlx5e_txqsq *sq;
+ u8 state;
+ int err;
+
+ sq = ctx;
+ mdev = sq->mdev;
+ dev = sq->netdev;
+
+ if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
+ return 0;
+
+ err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+ if (err) {
+ netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+ sq->sqn, err);
+ goto out;
+ }
+
+ if (state != MLX5_SQC_STATE_ERR)
+ goto out;
+
+ mlx5e_tx_disable_queue(sq->txq);
+
+ err = mlx5e_wait_for_sq_flush(sq);
+ if (err)
+ goto out;
+
+ /* At this point, no new packets will arrive from the stack as TXQ is
+ * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+ * pending WQEs. SQ can safely reset the SQ.
+ */
+
+ err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn);
+ if (err)
+ goto out;
+
+ mlx5e_reset_txqsq_cc_pc(sq);
+ sq->stats->recover++;
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ mlx5e_activate_txqsq(sq);
+
+ return 0;
+out:
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ return err;
+}
+
+struct mlx5e_tx_timeout_ctx {
+ struct mlx5e_txqsq *sq;
+ signed int status;
+};
+
+static int mlx5e_tx_reporter_timeout_recover(void *ctx)
+{
+ struct mlx5e_tx_timeout_ctx *to_ctx;
+ struct mlx5e_priv *priv;
+ struct mlx5_eq_comp *eq;
+ struct mlx5e_txqsq *sq;
+ int err;
+
+ to_ctx = ctx;
+ sq = to_ctx->sq;
+ eq = sq->cq.mcq.eq;
+ priv = sq->priv;
+ err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
+ if (!err) {
+ to_ctx->status = 0; /* this sq recovered */
+ return err;
+ }
+
+ err = mlx5e_safe_reopen_channels(priv);
+ if (!err) {
+ to_ctx->status = 1; /* all channels recovered */
+ return err;
+ }
+
+ to_ctx->status = err;
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ netdev_err(priv->netdev,
+ "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
+ err);
+
+ return err;
+}
+
+/* state lock cannot be grabbed within this function.
+ * It can cause a dead lock or a read-after-free.
+ */
+static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
+{
+ return err_ctx->recover(err_ctx->ctx);
+}
+
+static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
+ void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
+ mlx5e_health_recover_channels(priv);
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
+ struct mlx5e_txqsq *sq, int tc)
+{
+ bool stopped = netif_xmit_stopped(sq->txq);
+ struct mlx5e_priv *priv = sq->priv;
+ u8 state;
+ int err;
+
+ err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg);
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
+ struct mlx5e_txqsq *sq, int tc)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg,
+ struct mlx5e_ptpsq *ptpsq, int tc)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int
+mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
+ struct mlx5e_txqsq *txqsq)
+{
+ u32 sq_stride, sq_sz;
+ bool real_time;
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
+ if (err)
+ return err;
+
+ real_time = mlx5_is_real_time_sq(txqsq->mdev);
+ sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq);
+ sq_stride = MLX5_SEND_WQE_BB;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg,
+ struct mlx5e_ptpsq *ptpsq)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ struct mlx5e_ptpsq *generic_ptpsq;
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config");
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq);
+ if (err)
+ return err;
+
+ if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
+ goto out;
+
+ generic_ptpsq = &ptp_ch->ptpsq[0];
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq);
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+out:
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+
+ int i, tc, err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg);
+ if (err)
+ goto unlock;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
+ if (err)
+ goto unlock;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ struct mlx5e_txqsq *sq = &c->sq[tc];
+
+ err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
+ if (err)
+ goto unlock;
+ }
+ }
+
+ if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
+ goto close_sqs_nest;
+
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
+ &ptp_ch->ptpsq[tc],
+ tc);
+ if (err)
+ goto unlock;
+ }
+
+close_sqs_nest:
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ goto unlock;
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5_rsc_key key = {};
+ struct mlx5e_txqsq *sq = ctx;
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = sq->sqn;
+ key.num_of_obj1 = 1;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
+ key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
+
+ return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
+}
+
+static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ struct mlx5_rsc_key key = {};
+ int i, tc, err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
+ if (err)
+ return err;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ struct mlx5e_txqsq *sq = &c->sq[tc];
+
+ err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
+ if (err)
+ return err;
+ }
+ }
+
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
+
+ err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
+ if (err)
+ return err;
+ }
+ }
+
+ return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
+ struct mlx5e_err_ctx *err_ctx,
+ struct devlink_fmsg *fmsg)
+{
+ return err_ctx->dump(priv, fmsg, err_ctx->ctx);
+}
+
+static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
+ mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
+}
+
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_priv *priv = sq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = sq;
+ err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
+ err_ctx.dump = mlx5e_tx_reporter_dump_sq;
+ snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn);
+
+ mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
+}
+
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_tx_timeout_ctx to_ctx = {};
+ struct mlx5e_priv *priv = sq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ to_ctx.sq = sq;
+ err_ctx.ctx = &to_ctx;
+ err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+ err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
+ snprintf(err_str, sizeof(err_str),
+ "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
+ sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+ jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start)));
+
+ mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
+ return to_ctx.status;
+}
+
+static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
+ .name = "tx",
+ .recover = mlx5e_tx_reporter_recover,
+ .diagnose = mlx5e_tx_reporter_diagnose,
+ .dump = mlx5e_tx_reporter_dump,
+};
+
+#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
+
+void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops,
+ MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
+ if (IS_ERR(reporter)) {
+ netdev_warn(priv->netdev,
+ "Failed to create tx reporter, err = %ld\n",
+ PTR_ERR(reporter));
+ return;
+ }
+ priv->tx_reporter = reporter;
+}
+
+void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
+{
+ if (!priv->tx_reporter)
+ return;
+
+ devlink_port_health_reporter_destroy(priv->tx_reporter);
+ priv->tx_reporter = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
new file mode 100644
index 000000000..b915fb29d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rqt.h"
+#include <linux/mlx5/transobj.h>
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels)
+{
+ unsigned int i;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir->table[i] = i % num_channels;
+}
+
+static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u16 max_size, u32 *init_rqns, u16 init_size)
+{
+ void *rqtc;
+ int inlen;
+ int err;
+ u32 *in;
+ int i;
+
+ rqt->mdev = mdev;
+ rqt->size = max_size;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size);
+ for (i = 0; i < init_size; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]);
+
+ err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ bool indir_enabled, u32 init_rqn)
+{
+ u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1;
+
+ return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1);
+}
+
+static int mlx5e_bits_invert(unsigned long a, int size)
+{
+ int inv = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+
+ return inv;
+}
+
+static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ unsigned int i;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) {
+ unsigned int ix = i;
+
+ if (hfunc == ETH_RSS_HASH_XOR)
+ ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE));
+
+ ix = indir->table[ix];
+
+ if (WARN_ON(ix >= num_rqns))
+ /* Could be a bug in the driver or in the kernel part of
+ * ethtool: indir table refers to non-existent RQs.
+ */
+ return -EINVAL;
+ rss_rqns[i] = rqns[ix];
+ }
+
+ return 0;
+}
+
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ u32 *rss_rqns;
+ int err;
+
+ rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+ if (!rss_rqns)
+ return -ENOMEM;
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (err)
+ goto out;
+
+ err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+ kvfree(rss_rqns);
+ return err;
+}
+
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
+{
+ mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
+}
+
+static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size)
+{
+ unsigned int i;
+ void *rqtc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+
+ MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, size);
+ for (i = 0; i < size; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]);
+
+ err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn)
+{
+ return mlx5e_rqt_redirect(rqt, &rqn, 1);
+}
+
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ u32 *rss_rqns;
+ int err;
+
+ if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE))
+ return -EINVAL;
+
+ rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+ if (!rss_rqns)
+ return -ENOMEM;
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (err)
+ goto out;
+
+ err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+ kvfree(rss_rqns);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
new file mode 100644
index 000000000..60c985a12
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RQT_H__
+#define __MLX5_EN_RQT_H__
+
+#include <linux/kernel.h>
+
+#define MLX5E_INDIR_RQT_SIZE (1 << 8)
+
+struct mlx5_core_dev;
+
+struct mlx5e_rss_params_indir {
+ u32 table[MLX5E_INDIR_RQT_SIZE];
+};
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels);
+
+struct mlx5e_rqt {
+ struct mlx5_core_dev *mdev;
+ u32 rqtn;
+ u16 size;
+};
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ bool indir_enabled, u32 init_rqn);
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir);
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt);
+
+static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
+{
+ return rqt->rqtn;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn);
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir);
+
+#endif /* __MLX5_EN_RQT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
new file mode 100644
index 000000000..7f93426b8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.
+
+#include "rss.h"
+
+#define mlx5e_rss_warn(__dev, format, ...) \
+ dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = {
+ [MLX5_TT_IPV4_TCP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV6_TCP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV4_UDP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV6_UDP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV4_IPSEC_AH] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV6_IPSEC_AH] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV4_IPSEC_ESP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV6_IPSEC_ESP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV4] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+ [MLX5_TT_IPV6] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+};
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt)
+{
+ return rss_default_config[tt];
+}
+
+struct mlx5e_rss {
+ struct mlx5e_rss_params_hash hash;
+ struct mlx5e_rss_params_indir indir;
+ u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_rqt rqt;
+ struct mlx5_core_dev *mdev;
+ u32 drop_rqn;
+ bool inner_ft_support;
+ bool enabled;
+ refcount_t refcnt;
+};
+
+struct mlx5e_rss *mlx5e_rss_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_rss), GFP_KERNEL);
+}
+
+void mlx5e_rss_free(struct mlx5e_rss *rss)
+{
+ kvfree(rss);
+}
+
+static void mlx5e_rss_params_init(struct mlx5e_rss *rss)
+{
+ enum mlx5_traffic_types tt;
+
+ rss->hash.hfunc = ETH_RSS_HASH_TOP;
+ netdev_rss_key_fill(rss->hash.toeplitz_hash_key,
+ sizeof(rss->hash.toeplitz_hash_key));
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ rss->rx_hash_fields[tt] =
+ mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
+}
+
+static struct mlx5e_tir **rss_get_tirp(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ return inner ? &rss->inner_tir[tt] : &rss->tir[tt];
+}
+
+static struct mlx5e_tir *rss_get_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ return *rss_get_tirp(rss, tt, inner);
+}
+
+static struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+
+ rss_tt = mlx5e_rss_get_default_tt_config(tt);
+ rss_tt.rx_hash_fields = rss->rx_hash_fields[tt];
+ return rss_tt;
+}
+
+static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+ struct mlx5e_tir_builder *builder;
+ struct mlx5e_tir **tir_p;
+ struct mlx5e_tir *tir;
+ u32 rqtn;
+ int err;
+
+ if (inner && !rss->inner_ft_support) {
+ mlx5e_rss_warn(rss->mdev,
+ "Cannot create inner indirect TIR[%d], RSS inner FT is not supported.\n",
+ tt);
+ return -EINVAL;
+ }
+
+ tir_p = rss_get_tirp(rss, tt, inner);
+ if (*tir_p)
+ return -EINVAL;
+
+ tir = kvzalloc(sizeof(*tir), GFP_KERNEL);
+ if (!tir)
+ return -ENOMEM;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder) {
+ err = -ENOMEM;
+ goto free_tir;
+ }
+
+ rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
+ mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
+ rqtn, rss->inner_ft_support);
+ mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+ rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+ mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+
+ err = mlx5e_tir_init(tir, builder, rss->mdev, true);
+ mlx5e_tir_builder_free(builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to create %sindirect TIR: err = %d, tt = %d\n",
+ inner ? "inner " : "", err, tt);
+ goto free_tir;
+ }
+
+ *tir_p = tir;
+ return 0;
+
+free_tir:
+ kvfree(tir);
+ return err;
+}
+
+static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_tir **tir_p;
+ struct mlx5e_tir *tir;
+
+ tir_p = rss_get_tirp(rss, tt, inner);
+ if (!*tir_p)
+ return;
+
+ tir = *tir_p;
+ mlx5e_tir_destroy(tir);
+ kvfree(tir);
+ *tir_p = NULL;
+}
+
+static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner)
+{
+ enum mlx5_traffic_types tt, max_tt;
+ int err;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+ if (err)
+ goto err_destroy_tirs;
+ }
+
+ return 0;
+
+err_destroy_tirs:
+ max_tt = tt;
+ for (tt = 0; tt < max_tt; tt++)
+ mlx5e_rss_destroy_tir(rss, tt, inner);
+ return err;
+}
+
+static void mlx5e_rss_destroy_tirs(struct mlx5e_rss *rss, bool inner)
+{
+ enum mlx5_traffic_types tt;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ mlx5e_rss_destroy_tir(rss, tt, inner);
+}
+
+static int mlx5e_rss_update_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+ struct mlx5e_tir_builder *builder;
+ struct mlx5e_tir *tir;
+ int err;
+
+ tir = rss_get_tir(rss, tt, inner);
+ if (!tir)
+ return 0;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+
+ mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+ err = mlx5e_tir_modify(tir, builder);
+
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+static int mlx5e_rss_update_tirs(struct mlx5e_rss *rss)
+{
+ enum mlx5_traffic_types tt;
+ int err, retval;
+
+ retval = 0;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_rss_update_tir(rss, tt, false);
+ if (err) {
+ retval = retval ? : err;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ }
+
+ if (!rss->inner_ft_support)
+ continue;
+
+ err = mlx5e_rss_update_tir(rss, tt, true);
+ if (err) {
+ retval = retval ? : err;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ }
+ }
+ return retval;
+}
+
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn)
+{
+ rss->mdev = mdev;
+ rss->inner_ft_support = inner_ft_support;
+ rss->drop_rqn = drop_rqn;
+
+ mlx5e_rss_params_init(rss);
+ refcount_set(&rss->refcnt, 1);
+
+ return mlx5e_rqt_init_direct(&rss->rqt, mdev, true, drop_rqn);
+}
+
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param)
+{
+ int err;
+
+ err = mlx5e_rss_init_no_tirs(rss, mdev, inner_ft_support, drop_rqn);
+ if (err)
+ goto err_out;
+
+ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false);
+ if (err)
+ goto err_destroy_rqt;
+
+ if (inner_ft_support) {
+ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true);
+ if (err)
+ goto err_destroy_tirs;
+ }
+
+ return 0;
+
+err_destroy_tirs:
+ mlx5e_rss_destroy_tirs(rss, false);
+err_destroy_rqt:
+ mlx5e_rqt_destroy(&rss->rqt);
+err_out:
+ return err;
+}
+
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss)
+{
+ if (!refcount_dec_if_one(&rss->refcnt))
+ return -EBUSY;
+
+ mlx5e_rss_destroy_tirs(rss, false);
+
+ if (rss->inner_ft_support)
+ mlx5e_rss_destroy_tirs(rss, true);
+
+ mlx5e_rqt_destroy(&rss->rqt);
+
+ return 0;
+}
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss)
+{
+ refcount_inc(&rss->refcnt);
+}
+
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss)
+{
+ refcount_dec(&rss->refcnt);
+}
+
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss)
+{
+ return refcount_read(&rss->refcnt);
+}
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_tir *tir;
+
+ WARN_ON(inner && !rss->inner_ft_support);
+ tir = rss_get_tir(rss, tt, inner);
+ WARN_ON(!tir);
+
+ return mlx5e_tir_get_tirn(tir);
+}
+
+/* Fill the "tirn" output parameter.
+ * Create the requested TIR if it's its first usage.
+ */
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner, u32 *tirn)
+{
+ struct mlx5e_tir *tir;
+
+ tir = rss_get_tir(rss, tt, inner);
+ if (!tir) { /* TIR doesn't exist, create one */
+ int err;
+
+ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+ if (err)
+ return err;
+ tir = rss_get_tir(rss, tt, inner);
+ }
+
+ *tirn = mlx5e_tir_get_tirn(tir);
+ return 0;
+}
+
+static int mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+ int err;
+
+ err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir);
+ if (err)
+ mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n",
+ mlx5e_rqt_get_rqtn(&rss->rqt), err);
+ return err;
+}
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+ rss->enabled = true;
+ mlx5e_rss_apply(rss, rqns, num_rqns);
+}
+
+void mlx5e_rss_disable(struct mlx5e_rss *rss)
+{
+ int err;
+
+ rss->enabled = false;
+ err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn);
+ if (err)
+ mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n",
+ mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
+}
+
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+ struct mlx5e_packet_merge_param *pkt_merge_param)
+{
+ struct mlx5e_tir_builder *builder;
+ enum mlx5_traffic_types tt;
+ int err, final_err;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+
+ final_err = 0;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_tir *tir;
+
+ tir = rss_get_tir(rss, tt, false);
+ if (!tir)
+ goto inner_tir;
+ err = mlx5e_tir_modify(tir, builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+
+inner_tir:
+ if (!rss->inner_ft_support)
+ continue;
+
+ tir = rss_get_tir(rss, tt, true);
+ if (!tir)
+ continue;
+ err = mlx5e_tir_modify(tir, builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+}
+
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc)
+{
+ unsigned int i;
+
+ if (indir)
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir[i] = rss->indir.table[i];
+
+ if (key)
+ memcpy(key, rss->hash.toeplitz_hash_key,
+ sizeof(rss->hash.toeplitz_hash_key));
+
+ if (hfunc)
+ *hfunc = rss->hash.hfunc;
+
+ return 0;
+}
+
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+ const u8 *key, const u8 *hfunc,
+ u32 *rqns, unsigned int num_rqns)
+{
+ bool changed_indir = false;
+ bool changed_hash = false;
+ struct mlx5e_rss *old_rss;
+ int err = 0;
+
+ old_rss = mlx5e_rss_alloc();
+ if (!old_rss)
+ return -ENOMEM;
+
+ *old_rss = *rss;
+
+ if (hfunc && *hfunc != rss->hash.hfunc) {
+ switch (*hfunc) {
+ case ETH_RSS_HASH_XOR:
+ case ETH_RSS_HASH_TOP:
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ changed_hash = true;
+ changed_indir = true;
+ rss->hash.hfunc = *hfunc;
+ }
+
+ if (key) {
+ if (rss->hash.hfunc == ETH_RSS_HASH_TOP)
+ changed_hash = true;
+ memcpy(rss->hash.toeplitz_hash_key, key,
+ sizeof(rss->hash.toeplitz_hash_key));
+ }
+
+ if (indir) {
+ unsigned int i;
+
+ changed_indir = true;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ rss->indir.table[i] = indir[i];
+ }
+
+ if (changed_indir && rss->enabled) {
+ err = mlx5e_rss_apply(rss, rqns, num_rqns);
+ if (err) {
+ *rss = *old_rss;
+ goto out;
+ }
+ }
+
+ if (changed_hash)
+ mlx5e_rss_update_tirs(rss);
+
+out:
+ mlx5e_rss_free(old_rss);
+ return err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss)
+{
+ return rss->hash;
+}
+
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+ return rss->rx_hash_fields[tt];
+}
+
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields)
+{
+ u8 old_rx_hash_fields;
+ int err;
+
+ old_rx_hash_fields = rss->rx_hash_fields[tt];
+
+ if (old_rx_hash_fields == rx_hash_fields)
+ return 0;
+
+ rss->rx_hash_fields[tt] = rx_hash_fields;
+
+ err = mlx5e_rss_update_tir(rss, tt, false);
+ if (err) {
+ rss->rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ return err;
+ }
+
+ if (!(rss->inner_ft_support))
+ return 0;
+
+ err = mlx5e_rss_update_tir(rss, tt, true);
+ if (err) {
+ /* Partial update happened. Try to revert - it may fail too, but
+ * there is nothing more we can do.
+ */
+ rss->rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ if (mlx5e_rss_update_tir(rss, tt, false))
+ mlx5e_rss_warn(rss->mdev,
+ "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n",
+ tt);
+ }
+
+ return err;
+}
+
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch)
+{
+ mlx5e_rss_params_indir_init_uniform(&rss->indir, nch);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
new file mode 100644
index 000000000..c6b216416
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_RSS_H__
+#define __MLX5_EN_RSS_H__
+
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt);
+
+struct mlx5e_rss;
+
+struct mlx5e_rss *mlx5e_rss_alloc(void);
+void mlx5e_rss_free(struct mlx5e_rss *rss);
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param);
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn);
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss);
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss);
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss);
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner);
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner, u32 *tirn);
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
+void mlx5e_rss_disable(struct mlx5e_rss *rss);
+
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+ struct mlx5e_packet_merge_param *pkt_merge_param);
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+ const u8 *key, const u8 *hfunc,
+ u32 *rqns, unsigned int num_rqns);
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss);
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt);
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch);
+#endif /* __MLX5_EN_RSS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
new file mode 100644
index 000000000..e1095bc36
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -0,0 +1,640 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rx_res.h"
+#include "channels.h"
+#include "params.h"
+
+#define MLX5E_MAX_NUM_RSS 16
+
+struct mlx5e_rx_res {
+ struct mlx5_core_dev *mdev;
+ enum mlx5e_rx_res_features features;
+ unsigned int max_nch;
+ u32 drop_rqn;
+
+ struct mlx5e_packet_merge_param pkt_merge_param;
+ struct rw_semaphore pkt_merge_param_sem;
+
+ struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS];
+ bool rss_active;
+ u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+ unsigned int rss_nch;
+
+ struct {
+ struct mlx5e_rqt direct_rqt;
+ struct mlx5e_tir direct_tir;
+ } *channels;
+
+ struct {
+ struct mlx5e_rqt rqt;
+ struct mlx5e_tir tir;
+ } ptp;
+};
+
+/* API for rx_res_rss_* */
+
+static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
+ unsigned int init_nch)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_rss *rss;
+ int err;
+
+ if (WARN_ON(res->rss[0]))
+ return -EINVAL;
+
+ rss = mlx5e_rss_alloc();
+ if (!rss)
+ return -ENOMEM;
+
+ err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
+ &res->pkt_merge_param);
+ if (err)
+ goto err_rss_free;
+
+ mlx5e_rss_set_indir_uniform(rss, init_nch);
+
+ res->rss[0] = rss;
+
+ return 0;
+
+err_rss_free:
+ mlx5e_rss_free(rss);
+ return err;
+}
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_rss *rss;
+ int err, i;
+
+ for (i = 1; i < MLX5E_MAX_NUM_RSS; i++)
+ if (!res->rss[i])
+ break;
+
+ if (i == MLX5E_MAX_NUM_RSS)
+ return -ENOSPC;
+
+ rss = mlx5e_rss_alloc();
+ if (!rss)
+ return -ENOMEM;
+
+ err = mlx5e_rss_init_no_tirs(rss, res->mdev, inner_ft_support, res->drop_rqn);
+ if (err)
+ goto err_rss_free;
+
+ mlx5e_rss_set_indir_uniform(rss, init_nch);
+ if (res->rss_active)
+ mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+
+ res->rss[i] = rss;
+ *rss_idx = i;
+
+ return 0;
+
+err_rss_free:
+ mlx5e_rss_free(rss);
+ return err;
+}
+
+static int __mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ struct mlx5e_rss *rss = res->rss[rss_idx];
+ int err;
+
+ err = mlx5e_rss_cleanup(rss);
+ if (err)
+ return err;
+
+ mlx5e_rss_free(rss);
+ res->rss[rss_idx] = NULL;
+
+ return 0;
+}
+
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -EINVAL;
+
+ return __mlx5e_rx_res_rss_destroy(res, rss_idx);
+}
+
+static void mlx5e_rx_res_rss_destroy_all(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+ int err;
+
+ if (!rss)
+ continue;
+
+ err = __mlx5e_rx_res_rss_destroy(res, i);
+ if (err) {
+ unsigned int refcount;
+
+ refcount = mlx5e_rss_refcnt_read(rss);
+ mlx5_core_warn(res->mdev,
+ "Failed to destroy RSS context %d, refcount = %u, err = %d\n",
+ i, refcount, err);
+ }
+ }
+}
+
+static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ res->rss_active = true;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+
+ if (!rss)
+ continue;
+ mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+ }
+}
+
+static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ res->rss_active = false;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+
+ if (!rss)
+ continue;
+ mlx5e_rss_disable(rss);
+ }
+}
+
+/* Updates the indirection table SW shadow, does not update the HW resources yet */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch)
+{
+ WARN_ON_ONCE(res->rss_active);
+ mlx5e_rss_set_indir_uniform(res->rss[0], nch);
+}
+
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u32 *indir, u8 *key, u8 *hfunc)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -ENOENT;
+
+ return mlx5e_rss_get_rxfh(rss, indir, key, hfunc);
+}
+
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ const u32 *indir, const u8 *key, const u8 *hfunc)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -ENOENT;
+
+ return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch);
+}
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_hash_fields(rss, tt);
+}
+
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_set_hash_fields(rss, tt, rx_hash_fields);
+}
+
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res)
+{
+ int i, cnt;
+
+ cnt = 0;
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+ if (res->rss[i])
+ cnt++;
+
+ return cnt;
+}
+
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss)
+{
+ int i;
+
+ if (!rss)
+ return -EINVAL;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+ if (rss == res->rss[i])
+ return i;
+
+ return -ENOENT;
+}
+
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return NULL;
+
+ return res->rss[rss_idx];
+}
+
+/* End of API rx_res_rss_* */
+
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
+}
+
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err = 0;
+ int ix;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL);
+ if (!res->channels) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt,
+ res->mdev, false, res->drop_rqn);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_rqts;
+ }
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+ goto out;
+
+err_destroy_direct_tirs:
+ while (--ix >= 0)
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+
+ ix = res->max_nch;
+err_destroy_direct_rqts:
+ while (--ix >= 0)
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+ kvfree(res->channels);
+
+out:
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
+
+static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn);
+ if (err)
+ goto out;
+
+ /* Separated from the channels RQs, does not share pkt_merge state with them */
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true);
+ if (err)
+ goto err_destroy_ptp_rqt;
+
+ goto out;
+
+err_destroy_ptp_rqt:
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+
+out:
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+ }
+
+ kvfree(res->channels);
+}
+
+static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_tir_destroy(&res->ptp.tir);
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+}
+
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ unsigned int init_nch)
+{
+ int err;
+
+ res->mdev = mdev;
+ res->features = features;
+ res->max_nch = max_nch;
+ res->drop_rqn = drop_rqn;
+
+ res->pkt_merge_param = *init_pkt_merge_param;
+ init_rwsem(&res->pkt_merge_param_sem);
+
+ err = mlx5e_rx_res_rss_init_def(res, init_nch);
+ if (err)
+ goto err_out;
+
+ err = mlx5e_rx_res_channels_init(res);
+ if (err)
+ goto err_rss_destroy;
+
+ err = mlx5e_rx_res_ptp_init(res);
+ if (err)
+ goto err_channels_destroy;
+
+ return 0;
+
+err_channels_destroy:
+ mlx5e_rx_res_channels_destroy(res);
+err_rss_destroy:
+ __mlx5e_rx_res_rss_destroy(res, 0);
+err_out:
+ return err;
+}
+
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_rx_res_ptp_destroy(res);
+ mlx5e_rx_res_channels_destroy(res);
+ mlx5e_rx_res_rss_destroy_all(res);
+}
+
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res)
+{
+ kvfree(res);
+}
+
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_tirn(rss, tt, false);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_tirn(rss, tt, true);
+}
+
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+{
+ WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP));
+ return mlx5e_tir_get_tirn(&res->ptp.tir);
+}
+
+static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+}
+
+static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res,
+ struct mlx5e_channels *chs,
+ unsigned int ix)
+{
+ u32 rqn = res->rss_rqns[ix];
+ int err;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ rqn, ix, err);
+}
+
+static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
+ unsigned int ix)
+{
+ int err;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ res->drop_rqn, ix, err);
+}
+
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+ unsigned int nch, ix;
+ int err;
+
+ nch = mlx5e_channels_get_num(chs);
+
+ for (ix = 0; ix < chs->num; ix++) {
+ if (mlx5e_channels_is_xsk(chs, ix))
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+ }
+ res->rss_nch = chs->num;
+
+ mlx5e_rx_res_rss_enable(res);
+
+ for (ix = 0; ix < nch; ix++)
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
+ for (ix = nch; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ u32 rqn;
+
+ if (!mlx5e_channels_get_ptp_rqn(chs, &rqn))
+ rqn = res->drop_rqn;
+
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ rqn, err);
+ }
+}
+
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+ int err;
+
+ mlx5e_rx_res_rss_disable(res);
+
+ for (ix = 0; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ res->drop_rqn, err);
+ }
+}
+
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk)
+{
+ if (xsk)
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+
+ mlx5e_rx_res_rss_enable(res);
+
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
+}
+
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+ struct mlx5e_packet_merge_param *pkt_merge_param)
+{
+ struct mlx5e_tir_builder *builder;
+ int err, final_err;
+ unsigned int ix;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ down_write(&res->pkt_merge_param_sem);
+ res->pkt_merge_param = *pkt_merge_param;
+
+ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+
+ final_err = 0;
+
+ for (ix = 0; ix < MLX5E_MAX_NUM_RSS; ix++) {
+ struct mlx5e_rss *rss = res->rss[ix];
+
+ if (!rss)
+ continue;
+
+ err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param);
+ if (err)
+ final_err = final_err ? : err;
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n",
+ mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ up_write(&res->pkt_merge_param_sem);
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res)
+{
+ return mlx5e_rss_get_hash(res->rss[0]);
+}
+
+int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
+ struct mlx5e_tir *tir)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ u32 rqtn;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq);
+
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn,
+ inner_ft_support);
+ mlx5e_tir_builder_build_direct(builder);
+ mlx5e_tir_builder_build_tls(builder);
+ down_read(&res->pkt_merge_param_sem);
+ mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
+ err = mlx5e_tir_init(tir, builder, res->mdev, false);
+ up_read(&res->pkt_merge_param_sem);
+
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
new file mode 100644
index 000000000..5d5f64fab
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RX_RES_H__
+#define __MLX5_EN_RX_RES_H__
+
+#include <linux/kernel.h>
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+#include "rss.h"
+
+struct mlx5e_rx_res;
+
+struct mlx5e_channels;
+struct mlx5e_rss_params_hash;
+
+enum mlx5e_rx_res_features {
+ MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
+ MLX5E_RX_RES_FEATURE_PTP = BIT(1),
+};
+
+/* Setup */
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ unsigned int init_nch);
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+
+/* TIRN getters for flow steering */
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+
+/* Activate/deactivate API */
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk);
+
+/* Configuration API */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ const u32 *indir, const u8 *key, const u8 *hfunc);
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+ struct mlx5e_packet_merge_param *pkt_merge_param);
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss);
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx);
+
+/* Workaround for hairpin */
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
+
+/* Accel TIRs */
+int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
+ struct mlx5e_tir *tir);
+#endif /* __MLX5_EN_RX_RES_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
new file mode 100644
index 000000000..f675b1926
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "selq.h"
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include "en.h"
+#include "en/ptp.h"
+#include "en/htb.h"
+
+struct mlx5e_selq_params {
+ unsigned int num_regular_queues;
+ unsigned int num_channels;
+ unsigned int num_tcs;
+ union {
+ u8 is_special_queues;
+ struct {
+ bool is_htb : 1;
+ bool is_ptp : 1;
+ };
+ };
+ u16 htb_maj_id;
+ u16 htb_defcls;
+};
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
+{
+ struct mlx5e_selq_params *init_params;
+
+ selq->state_lock = state_lock;
+
+ selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL);
+ if (!selq->standby)
+ return -ENOMEM;
+
+ init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL);
+ if (!init_params) {
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ return -ENOMEM;
+ }
+ /* Assign dummy values, so that mlx5e_select_queue won't crash. */
+ *init_params = (struct mlx5e_selq_params) {
+ .num_regular_queues = 1,
+ .num_channels = 1,
+ .num_tcs = 1,
+ .is_htb = false,
+ .is_ptp = false,
+ .htb_maj_id = 0,
+ .htb_defcls = 0,
+ };
+ rcu_assign_pointer(selq->active, init_params);
+
+ return 0;
+}
+
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
+{
+ WARN_ON_ONCE(selq->is_prepared);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ selq->is_prepared = true;
+
+ mlx5e_selq_apply(selq);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+}
+
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->num_channels = params->num_channels;
+ selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params);
+ selq->standby->num_regular_queues =
+ selq->standby->num_channels * selq->standby->num_tcs;
+ selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS);
+}
+
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *selq_active =
+ rcu_dereference_protected(selq->active, lockdep_is_held(selq->state_lock));
+
+ return selq_active->htb_maj_id;
+}
+
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->is_htb = htb_maj_id;
+ selq->standby->htb_maj_id = htb_maj_id;
+ selq->standby->htb_defcls = htb_defcls;
+}
+
+void mlx5e_selq_apply(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *old_params;
+
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+
+ old_params = rcu_replace_pointer(selq->active, selq->standby,
+ lockdep_is_held(selq->state_lock));
+ synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */
+ selq->standby = old_params;
+}
+
+void mlx5e_selq_cancel(struct mlx5e_selq *selq)
+{
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+}
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+ int dscp_cp = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+ return priv->dcbx_dp.dscp2prio[dscp_cp];
+}
+#endif
+
+static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP)
+ return mlx5e_get_dscp_up(priv, skb);
+#endif
+ if (skb_vlan_tag_present(skb))
+ return skb_vlan_tag_get_prio(skb);
+ return 0;
+}
+
+static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int up;
+
+ up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0;
+
+ return selq->num_regular_queues + up;
+}
+
+static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ u16 classid;
+
+ /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
+ if ((TC_H_MAJ(skb->priority) >> 16) == selq->htb_maj_id)
+ classid = TC_H_MIN(skb->priority);
+ else
+ classid = selq->htb_defcls;
+
+ if (!classid)
+ return 0;
+
+ return mlx5e_htb_get_txq_by_classid(priv->htb, classid);
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_selq_params *selq;
+ int txq_ix, up;
+
+ selq = rcu_dereference_bh(priv->selq.active);
+
+ /* This is a workaround needed only for the mlx5e_netdev_change_profile
+ * flow that zeroes out the whole priv without unregistering the netdev
+ * and without preventing ndo_select_queue from being called.
+ */
+ if (unlikely(!selq))
+ return 0;
+
+ if (likely(!selq->is_special_queues)) {
+ /* No special queues, netdev_pick_tx returns one of the regular ones. */
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ /* Normalize any picked txq_ix to [0, num_channels),
+ * So we can return a txq_ix that matches the channel and
+ * packet UP.
+ */
+ return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) +
+ up * selq->num_channels;
+ }
+
+ if (unlikely(selq->htb_maj_id)) {
+ /* num_tcs == 1, shortcut for PTP */
+
+ txq_ix = mlx5e_select_htb_queue(priv, skb, selq);
+ if (txq_ix > 0)
+ return txq_ix;
+
+ if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb)))
+ return selq->num_channels;
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
+ * If they are selected, switch to regular queues.
+ * Driver to select these queues only at mlx5e_select_ptpsq()
+ * and mlx5e_select_htb_queue().
+ */
+ return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels);
+ }
+
+ /* PTP is enabled */
+
+ if (mlx5e_use_ptpsq(skb))
+ return mlx5e_select_ptpsq(dev, skb, selq);
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Normalize any picked txq_ix to [0, num_channels). Queues in range
+ * [0, num_regular_queues) will be mapped to the corresponding channel
+ * index, so that we can apply the packet's UP (if num_tcs > 1).
+ * If netdev_pick_tx() picks ptp_channel, switch to a regular queue,
+ * because driver should select the PTP only at mlx5e_select_ptpsq().
+ */
+ txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ return txq_ix + up * selq->num_channels;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
new file mode 100644
index 000000000..fd590f80e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_SELQ_H__
+#define __MLX5_EN_SELQ_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_selq_params;
+
+struct mlx5e_selq {
+ struct mlx5e_selq_params __rcu *active;
+ struct mlx5e_selq_params *standby;
+ struct mutex *state_lock; /* points to priv->state_lock */
+ bool is_prepared;
+};
+
+struct mlx5e_params;
+struct net_device;
+struct sk_buff;
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock);
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq);
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params);
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls);
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq);
+void mlx5e_selq_apply(struct mlx5e_selq *selq);
+void mlx5e_selq_cancel(struct mlx5e_selq *selq);
+
+static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels)
+{
+ while (unlikely(txq >= num_channels))
+ txq -= num_channels;
+ return txq;
+}
+
+static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels)
+{
+ if (unlikely(txq >= num_channels)) {
+ if (unlikely(txq >= num_channels << 3))
+ txq %= num_channels;
+ else
+ do
+ txq -= num_channels;
+ while (txq >= num_channels);
+ }
+ return txq;
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev);
+
+#endif /* __MLX5_EN_SELQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
new file mode 100644
index 000000000..21aab9635
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_ACCEPT;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_accept = {
+ .can_offload = tc_act_can_offload_accept,
+ .parse_action = tc_act_parse_accept,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
new file mode 100644
index 000000000..3337241cf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc/post_act.h"
+#include "en/tc_priv.h"
+#include "mlx5_core.h"
+
+static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress,
+ [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_MANGLE] = &mlx5e_tc_act_vlan_mangle,
+ [FLOW_ACTION_TUNNEL_ENCAP] = &mlx5e_tc_act_tun_encap,
+ [FLOW_ACTION_TUNNEL_DECAP] = &mlx5e_tc_act_tun_decap,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_PTYPE] = &mlx5e_tc_act_ptype,
+ [FLOW_ACTION_SAMPLE] = &mlx5e_tc_act_sample,
+ [FLOW_ACTION_POLICE] = &mlx5e_tc_act_police,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+ [FLOW_ACTION_MPLS_PUSH] = &mlx5e_tc_act_mpls_push,
+ [FLOW_ACTION_MPLS_POP] = &mlx5e_tc_act_mpls_pop,
+ [FLOW_ACTION_VLAN_PUSH_ETH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP_ETH] = &mlx5e_tc_act_vlan,
+};
+
+static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred_nic,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_MARK] = &mlx5e_tc_act_mark,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+};
+
+/**
+ * mlx5e_tc_act_get() - Get an action parser for an action id.
+ * @act_id: Flow action id.
+ * @ns_type: flow namespace type.
+ */
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5e_tc_act **tc_acts;
+
+ tc_acts = ns_type == MLX5_FLOW_NAMESPACE_FDB ? tc_acts_fdb : tc_acts_nic;
+
+ return tc_acts[act_id];
+}
+
+/**
+ * mlx5e_tc_act_init_parse_state() - Init a new parse_state.
+ * @parse_state: Parsing state.
+ * @flow: mlx5e tc flow being handled.
+ * @flow_action: flow action to parse.
+ * @extack: to set an error msg.
+ *
+ * The same parse_state should be passed to action parsers
+ * for tracking the current parsing state.
+ */
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ memset(parse_state, 0, sizeof(*parse_state));
+ parse_state->flow = flow;
+ parse_state->extack = extack;
+ parse_state->flow_action = flow_action;
+}
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder)
+{
+ struct flow_action_entry *act;
+ int i, j = 0;
+
+ flow_action_for_each(i, act, flow_action) {
+ /* Add CT action to be first. */
+ if (act->id == FLOW_ACTION_CT)
+ flow_action_reorder->entries[j++] = act;
+ }
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT)
+ continue;
+ flow_action_reorder->entries[j++] = act;
+ }
+}
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct flow_action_entry *act;
+ struct mlx5e_tc_act *tc_act;
+ struct mlx5e_priv *priv;
+ int err = 0, i;
+
+ priv = parse_state->flow->priv;
+
+ flow_action_for_each(i, act, flow_action) {
+ tc_act = mlx5e_tc_act_get(act->id, ns_type);
+ if (!tc_act || !tc_act->post_parse)
+ continue;
+
+ err = tc_act->post_parse(parse_state, priv, attr);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr)
+{
+ struct mlx5_core_dev *mdev = flow->priv->mdev;
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ int err;
+
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+
+ /* Set handle on current post act rule to next post act rule. */
+ err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts);
+ if (err) {
+ mlx5_core_warn(mdev, "Failed setting post action handle");
+ return err;
+ }
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
new file mode 100644
index 000000000..e1570ff05
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_H__
+#define __MLX5_EN_TC_ACT_H__
+
+#include <net/tc_act/tc_pedit.h>
+#include <net/flow_offload.h>
+#include <linux/netlink.h>
+#include "eswitch.h"
+#include "pedit.h"
+
+struct mlx5_flow_attr;
+
+struct mlx5e_tc_act_parse_state {
+ struct flow_action *flow_action;
+ struct mlx5e_tc_flow *flow;
+ struct netlink_ext_ack *extack;
+ u32 actions;
+ bool ct;
+ bool ct_clear;
+ bool encap;
+ bool decap;
+ bool mpls_push;
+ bool eth_push;
+ bool eth_pop;
+ bool ptype_host;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
+ int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
+ int if_count;
+ struct mlx5_tc_ct_priv *ct_priv;
+};
+
+struct mlx5e_tc_act {
+ bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr);
+
+ int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ bool (*is_multi_table_act)(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr);
+
+ int (*offload_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act);
+
+ int (*destroy_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+
+ int (*stats_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+};
+
+struct mlx5e_tc_flow_action {
+ unsigned int num_entries;
+ struct flow_action_entry **entries;
+};
+
+extern struct mlx5e_tc_act mlx5e_tc_act_drop;
+extern struct mlx5e_tc_act mlx5e_tc_act_trap;
+extern struct mlx5e_tc_act mlx5e_tc_act_accept;
+extern struct mlx5e_tc_act mlx5e_tc_act_mark;
+extern struct mlx5e_tc_act mlx5e_tc_act_goto;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_encap;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_decap;
+extern struct mlx5e_tc_act mlx5e_tc_act_csum;
+extern struct mlx5e_tc_act mlx5e_tc_act_pedit;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic;
+extern struct mlx5e_tc_act mlx5e_tc_act_ct;
+extern struct mlx5e_tc_act mlx5e_tc_act_sample;
+extern struct mlx5e_tc_act mlx5e_tc_act_ptype;
+extern struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress;
+extern struct mlx5e_tc_act mlx5e_tc_act_police;
+
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack);
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder);
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type);
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr);
+
+#endif /* __MLX5_EN_TC_ACT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
new file mode 100644
index 000000000..c0f08ae6a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/tc_act/tc_csum.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+csum_offload_supported(struct mlx5e_priv *priv,
+ u32 action,
+ u32 update_flags,
+ struct netlink_ext_ack *extack)
+{
+ u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+ TCA_CSUM_UPDATE_FLAG_UDP;
+
+ /* The HW recalcs checksums only if re-writing headers */
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "TC csum action is only offloaded with pedit");
+ netdev_warn(priv->netdev,
+ "TC csum action is only offloaded with pedit\n");
+ return false;
+ }
+
+ if (update_flags & ~prot_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload TC csum action for some header/s");
+ netdev_warn(priv->netdev,
+ "can't offload TC csum action for some header/s - flags %#x\n",
+ update_flags);
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ return csum_offload_supported(flow->priv, attr->action,
+ act->csum_flags, parse_state->extack);
+}
+
+static int
+tc_act_parse_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_csum = {
+ .can_offload = tc_act_can_offload_csum,
+ .parse_action = tc_act_parse_csum,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
new file mode 100644
index 000000000..a829c9428
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+
+static bool
+tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->ct && !clear_action) {
+ NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ int err;
+
+ /* It's redundant to do ct clear more than once. */
+ if (clear_action && parse_state->ct_clear)
+ return 0;
+
+ err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
+ &attr->parse_attr->mod_hdr_acts,
+ act, parse_state->extack);
+ if (err)
+ return err;
+
+
+ if (mlx5e_is_eswitch_flow(parse_state->flow))
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ if (clear_action) {
+ parse_state->ct_clear = true;
+ } else {
+ attr->flags |= MLX5_ATTR_FLAG_CT;
+ flow_flag_set(parse_state->flow, CT);
+ parse_state->ct = true;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts;
+ int err;
+
+ /* If ct action exist, we can ignore previous ct_clear actions */
+ if (parse_state->ct)
+ return 0;
+
+ if (parse_state->ct_clear) {
+ err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Failed to set registers for ct clear");
+ return err;
+ }
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Prevent handling of additional, redundant clear actions */
+ parse_state->ct_clear = false;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->ct.action & TCA_CT_ACT_CLEAR)
+ return false;
+
+ return true;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ct = {
+ .can_offload = tc_act_can_offload_ct,
+ .parse_action = tc_act_parse_ct,
+ .is_multi_table_act = tc_act_is_multi_table_act_ct,
+ .post_parse = tc_act_post_parse_ct,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
new file mode 100644
index 000000000..dd025a95c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_drop = {
+ .can_offload = tc_act_can_offload_drop,
+ .parse_action = tc_act_parse_drop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
new file mode 100644
index 000000000..25174f686
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "eswitch.h"
+
+static int
+validate_goto_chain(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ bool is_esw = mlx5e_is_eswitch_flow(flow);
+ bool ft_flow = mlx5e_is_ft_flow(flow);
+ u32 dest_chain = act->chain_index;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_eswitch *esw;
+ u32 reformat_and_fwd;
+ u32 max_chain;
+
+ esw = priv->mdev->priv.eswitch;
+ chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(tc);
+ max_chain = mlx5_chains_get_chain_range(chains);
+ reformat_and_fwd = is_esw ?
+ MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
+
+ if (ft_flow) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_chains_backwards_supported(chains) &&
+ dest_chain <= attr->chain) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (dest_chain > max_chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested destination chain is out of supported range");
+ return -EOPNOTSUPP;
+ }
+
+ if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ !reformat_and_fwd) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Goto chain is not allowed if action has reformat or decap");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (validate_goto_chain(flow->priv, flow, attr, act, extack))
+ return false;
+
+ return true;
+}
+
+static int
+tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->dest_chain = act->chain_index;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (!attr->dest_chain)
+ return 0;
+
+ if (parse_state->decap) {
+ /* It can be supported if we'll create a mapping for
+ * the tunnel device only (without tunnel), and set
+ * this tunnel id with this decap flow.
+ *
+ * On restore (miss), we'll just set this saved tunnel
+ * device.
+ */
+
+ NL_SET_ERR_MSG_MOD(extack, "Decap with goto isn't supported");
+ netdev_warn(priv->netdev, "Decap with goto isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5e_is_eswitch_flow(flow) && parse_attr->mirred_ifindex[0]) {
+ NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_goto = {
+ .can_offload = tc_act_can_offload_goto,
+ .parse_action = tc_act_parse_goto,
+ .post_parse = tc_act_post_parse_goto,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
new file mode 100644
index 000000000..e8d227595
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en_tc.h"
+
+static bool
+tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->nic_attr->flow_tag = act->mark;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mark = {
+ .can_offload = tc_act_can_offload_mark,
+ .parse_action = tc_act_parse_mark,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
new file mode 100644
index 000000000..4ac7de3f6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_macvlan.h>
+#include <linux/if_vlan.h>
+#include <net/bareudp.h>
+#include <net/bonding.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+#include "en_rep.h"
+#include "lag/lag.h"
+
+static bool
+same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev)
+{
+ return mlx5e_eswitch_vf_rep(priv->netdev) &&
+ priv->netdev == out_dev;
+}
+
+static int
+verify_uplink_forwarding(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rep_priv;
+
+ /* Forwarding non encapsulated traffic between
+ * uplink ports is allowed only if
+ * termination_table_raw_traffic cap is set.
+ *
+ * Input vport was stored attr->in_rep.
+ * In LAG case, *priv* is the private data of
+ * uplink which may be not the input vport.
+ */
+ rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep);
+
+ if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
+ mlx5e_eswitch_uplink_rep(out_dev)))
+ return 0;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
+ termination_table_raw_traffic)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are both uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ } else if (out_dev != rep_priv->netdev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not the same uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static bool
+is_duplicated_output_device(struct net_device *dev,
+ struct net_device *out_dev,
+ int *ifindexes, int if_count,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < if_count; i++) {
+ if (ifindexes[i] == out_dev->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device");
+ netdev_err(dev, "can't duplicate output to same device: %s\n",
+ out_dev->name);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static struct net_device *
+get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev)
+{
+ struct net_device *fdb_out_dev = out_dev;
+ struct net_device *uplink_upper;
+
+ rcu_read_lock();
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+ if (uplink_upper && netif_is_lag_master(uplink_upper) &&
+ uplink_upper == out_dev) {
+ fdb_out_dev = uplink_dev;
+ } else if (netif_is_lag_master(out_dev)) {
+ fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
+ if (fdb_out_dev &&
+ (!mlx5e_eswitch_rep(fdb_out_dev) ||
+ !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
+ fdb_out_dev = NULL;
+ }
+ rcu_read_unlock();
+ return fdb_out_dev;
+}
+
+static bool
+tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev) {
+ /* out_dev is NULL when filters with
+ * non-existing mirred device are replayed to
+ * the driver.
+ */
+ return false;
+ }
+
+ if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device");
+ return false;
+ }
+
+ if (parse_state->eth_pop && !parse_state->mpls_push) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push");
+ return false;
+ }
+
+ if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push");
+ return false;
+ }
+
+ if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) {
+ /* Ignore forward to self rules generated
+ * by adding both mlx5 devs to the flow table
+ * block on a normal nft offload setup.
+ */
+ return false;
+ }
+
+ if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't support more output ports, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "can't support more than %d output ports, can't offload forwarding\n",
+ esw_attr->out_count);
+ return false;
+ }
+
+ if (parse_state->encap ||
+ netdev_port_same_parent_id(priv->netdev, out_dev) ||
+ netif_is_ovs_master(out_dev))
+ return true;
+
+ if (parse_attr->filter_dev != priv->netdev) {
+ /* All mlx5 devices are called to configure
+ * high level device filters. Therefore, the
+ * *attempt* to install a filter on invalid
+ * eswitch should not trigger an explicit error
+ */
+ return false;
+ }
+
+ NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding");
+
+ return false;
+}
+
+static int
+parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+
+ parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex;
+ parse_attr->tun_info[esw_attr->out_count] =
+ mlx5e_dup_tun_info(parse_state->tun_info);
+
+ if (!parse_attr->tun_info[esw_attr->out_count])
+ return -ENOMEM;
+
+ parse_state->encap = false;
+
+ if (parse_state->mpls_push) {
+ memcpy(&parse_attr->mpls_info[esw_attr->out_count],
+ &parse_state->mpls_info, sizeof(parse_state->mpls_info));
+ parse_state->mpls_push = false;
+ }
+ esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP;
+ esw_attr->out_count++;
+ /* attr->dests[].rep is resolved when we handle encap */
+
+ return 0;
+}
+
+static int
+parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct net_device *out_dev = act->dev;
+ struct net_device *uplink_dev;
+ struct mlx5e_priv *out_priv;
+ struct mlx5_eswitch *esw;
+ bool is_uplink_rep;
+ int *ifindexes;
+ int if_count;
+ int err;
+
+ esw = priv->mdev->priv.eswitch;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ ifindexes = parse_state->ifindexes;
+ if_count = parse_state->if_count;
+
+ if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack))
+ return -EOPNOTSUPP;
+
+ parse_state->ifindexes[if_count] = out_dev->ifindex;
+ parse_state->if_count++;
+ is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev);
+ err = mlx5_lag_do_mirred(priv->mdev, out_dev);
+ if (err)
+ return err;
+
+ out_dev = get_fdb_out_dev(uplink_dev, out_dev);
+ if (!out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(out_dev)) {
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack);
+ if (err)
+ return err;
+ }
+
+ if (is_vlan_dev(parse_attr->filter_dev)) {
+ err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack);
+ if (err)
+ return err;
+ }
+
+ if (netif_is_macvlan(out_dev))
+ out_dev = macvlan_dev_real_dev(out_dev);
+
+ err = verify_uplink_forwarding(priv, attr, out_dev, extack);
+ if (err)
+ return err;
+
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+
+ if (same_vf_reps(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself");
+ return -EOPNOTSUPP;
+ }
+
+ out_priv = netdev_priv(out_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
+ esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
+
+ /* If output device is bond master then rules are not explicit
+ * so we don't attempt to count them.
+ */
+ if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
+ MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
+ attr->lag.count = true;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+static int
+parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_EGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+ return 0;
+}
+
+static int
+tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct net_device *out_dev = act->dev;
+ int err = -EOPNOTSUPP;
+
+ if (parse_state->encap)
+ err = parse_mirred_encap(parse_state, act, attr);
+ else if (netdev_port_same_parent_id(priv->netdev, out_dev))
+ err = parse_mirred(parse_state, act, priv, attr);
+ else if (netif_is_ovs_master(out_dev))
+ err = parse_mirred_ovs_master(parse_state, act, priv, attr);
+
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred = {
+ .can_offload = tc_act_can_offload_mirred,
+ .parse_action = tc_act_parse_mirred,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
new file mode 100644
index 000000000..90b4c1b34
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+
+ if (act->id != FLOW_ACTION_REDIRECT)
+ return false;
+
+ if (priv->netdev->netdev_ops != out_dev->netdev_ops ||
+ !mlx5e_same_hw_devs(priv, netdev_priv(out_dev))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "devices %s %s not on same switch HW, can't offload forwarding\n",
+ netdev_name(priv->netdev),
+ out_dev->name);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex;
+ flow_flag_set(parse_state->flow, HAIRPIN);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = {
+ .can_offload = tc_act_can_offload_mirred_nic,
+ .parse_action = tc_act_parse_mirred_nic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
new file mode 100644
index 000000000..f106190bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/bareudp.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_priv *priv = parse_state->flow->priv;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) ||
+ act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls push is supported only for mpls_uc protocol");
+ return false;
+ }
+
+ return true;
+}
+
+static void
+copy_mpls_info(struct mlx5e_mpls_info *mpls_info,
+ const struct flow_action_entry *act)
+{
+ mpls_info->label = act->mpls_push.label;
+ mpls_info->tc = act->mpls_push.tc;
+ mpls_info->bos = act->mpls_push.bos;
+ mpls_info->ttl = act->mpls_push.ttl;
+}
+
+static int
+tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->mpls_push = true;
+ copy_mpls_info(&parse_state->mpls_info, act);
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct net_device *filter_dev;
+
+ filter_dev = attr->parse_attr->filter_dev;
+
+ /* we only support mpls pop if it is the first action
+ * or it is second action after tunnel key unset
+ * and the filter net device is bareudp. Subsequent
+ * actions can be pedit and the last can be mirred
+ * egress redirect.
+ */
+ if ((act_index == 1 && !parse_state->decap) || act_index > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap");
+ return false;
+ }
+
+ if (!netif_is_bareudp(filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->esw_attr->eth.h_proto = act->mpls_pop.proto;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_flag_set(parse_state->flow, L3_TO_L2_DECAP);
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_push = {
+ .can_offload = tc_act_can_offload_mpls_push,
+ .parse_action = tc_act_parse_mpls_push,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_pop = {
+ .can_offload = tc_act_can_offload_mpls_pop,
+ .parse_action = tc_act_parse_mpls_pop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
new file mode 100644
index 000000000..47597c524
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "pedit.h"
+#include "en/tc_priv.h"
+#include "en/mod_hdr.h"
+
+static int pedit_header_offsets[] = {
+ [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+ [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+ [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+};
+
+#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
+
+static int
+set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u32 *curr_pmask, *curr_pval;
+
+ curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
+ curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
+
+ if (*curr_pmask & mask) { /* disallow acting twice on the same location */
+ NL_SET_ERR_MSG_MOD(extack,
+ "curr_pmask and new mask same. Acting twice on same location");
+ goto out_err;
+ }
+
+ *curr_pmask |= mask;
+ *curr_pval |= (val & mask);
+
+ return 0;
+
+out_err:
+ return -EOPNOTSUPP;
+}
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
+ u8 htype = act->mangle.htype;
+ int err = -EOPNOTSUPP;
+ u32 mask, val, offset;
+
+ if (htype == FLOW_ACT_MANGLE_UNSPEC) {
+ NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
+ goto out_err;
+ }
+
+ if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) {
+ NL_SET_ERR_MSG_MOD(extack, "The pedit offload action is not supported");
+ goto out_err;
+ }
+
+ mask = act->mangle.mask;
+ val = act->mangle.val;
+ offset = act->mangle.offset;
+
+ err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack);
+ if (err)
+ goto out_err;
+
+ hdrs[cmd].pedits++;
+
+ return 0;
+out_err:
+ return err;
+}
+
+static bool
+tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(flow);
+
+ err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs,
+ parse_state->extack);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_pedit = {
+ .can_offload = tc_act_can_offload_pedit,
+ .parse_action = tc_act_parse_pedit,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
new file mode 100644
index 000000000..434c8bd71
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_PEDIT_H__
+#define __MLX5_EN_TC_ACT_PEDIT_H__
+
+#include "en_tc.h"
+
+struct pedit_headers {
+ struct ethhdr eth;
+ struct vlan_hdr vlan;
+ struct iphdr ip4;
+ struct ipv6hdr ip6;
+ struct tcphdr tcp;
+ struct udphdr udp;
+};
+
+struct pedit_headers_action {
+ struct pedit_headers vals;
+ struct pedit_headers masks;
+ u32 pedits;
+};
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
new file mode 100644
index 000000000..c8e5ca65b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
+ act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Offload not supported when conform action is not pipe or ok");
+ return false;
+ }
+ if (mlx5e_policer_validate(parse_state->flow_action, act,
+ parse_state->extack))
+ return false;
+
+ return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev);
+}
+
+static int
+fill_meter_params_from_act(const struct flow_action_entry *act,
+ struct mlx5e_flow_meter_params *params)
+{
+ params->index = act->hw_index;
+ if (act->police.rate_bytes_ps) {
+ params->mode = MLX5_RATE_LIMIT_BPS;
+ /* change rate to bits per second */
+ params->rate = act->police.rate_bytes_ps << 3;
+ params->burst = act->police.burst;
+ } else if (act->police.rate_pkt_ps) {
+ params->mode = MLX5_RATE_LIMIT_PPS;
+ params->rate = act->police.rate_pkt_ps;
+ params->burst = act->police.burst_pkt;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ int err;
+
+ err = fill_meter_params_from_act(act, &attr->meter_attr.params);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO;
+ attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER;
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_police(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_police_offload(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ int err = 0;
+
+ err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack);
+ if (err)
+ return err;
+
+ err = fill_meter_params_from_act(act, &params);
+ if (err)
+ return err;
+
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter) && PTR_ERR(meter) == -ENOENT) {
+ meter = mlx5e_tc_meter_replace(priv->mdev, &params);
+ } else if (!IS_ERR(meter)) {
+ err = mlx5e_tc_meter_update(meter, &params);
+ mlx5e_tc_meter_put(meter);
+ }
+
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ err = PTR_ERR(meter);
+ }
+
+ return err;
+}
+
+static int
+tc_act_police_destroy(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+ /* first put for the get and second for cleanup */
+ mlx5e_tc_meter_put(meter);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+static int
+tc_act_police_stats(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ u64 bytes, packets, drops, lastuse;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+
+ mlx5e_tc_meter_get_stats(meter, &bytes, &packets, &drops, &lastuse);
+ flow_stats_update(&fl_act->stats, bytes, packets, drops, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_police = {
+ .can_offload = tc_act_can_offload_police,
+ .parse_action = tc_act_parse_police,
+ .is_multi_table_act = tc_act_is_multi_table_act_police,
+ .offload_action = tc_act_police_offload,
+ .destroy_action = tc_act_police_destroy,
+ .stats_action = tc_act_police_stats,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
new file mode 100644
index 000000000..6454b031f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (act->ptype != PACKET_HOST) {
+ NL_SET_ERR_MSG_MOD(extack, "skbedit ptype is only supported with type host");
+ return -EOPNOTSUPP;
+ }
+
+ parse_state->ptype_host = true;
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ptype = {
+ .can_offload = tc_act_can_offload_ptype,
+ .parse_action = tc_act_parse_ptype,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
new file mode 100644
index 000000000..ad09a8a5f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev)
+ return false;
+
+ if (!netif_is_ovs_master(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is supported only for OVS internal ports");
+ return false;
+ }
+
+ if (netif_is_ovs_master(parse_attr->filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is not supported from internal port");
+ return false;
+ }
+
+ if (!parse_state->ptype_host) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress requires ptype=host action");
+ return false;
+ }
+
+ if (esw_attr->out_count) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress is supported only as single destination");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_INGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress = {
+ .can_offload = tc_act_can_offload_redirect_ingress,
+ .parse_action = tc_act_parse_redirect_ingress,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
new file mode 100644
index 000000000..2c0196431
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/psample.h>
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc/act/sample.h"
+
+static bool
+tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ bool ct_nat;
+
+ ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+
+ if (flow_flag_test(parse_state->flow, CT) && ct_nat) {
+ NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_sample_attr *sample_attr = &attr->sample_attr;
+
+ sample_attr->rate = act->sample.rate;
+ sample_attr->group_num = act->sample.psample_group->group_num;
+
+ if (act->sample.truncate)
+ sample_attr->trunc_size = act->sample.trunc_size;
+
+ attr->flags |= MLX5_ATTR_FLAG_SAMPLE;
+ flow_flag_set(parse_state->flow, SAMPLE);
+
+ return 0;
+}
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr)
+{
+ if (MLX5_CAP_GEN(mdev, reg_c_preserve) ||
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return true;
+
+ return false;
+}
+
+static bool
+tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr);
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_sample = {
+ .can_offload = tc_act_can_offload_sample,
+ .parse_action = tc_act_parse_sample,
+ .is_multi_table_act = tc_act_is_multi_table_act_sample,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
new file mode 100644
index 000000000..3efb3a15c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__
+#define __MLX5_EN_TC_ACT_SAMPLE_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr);
+
+#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
new file mode 100644
index 000000000..53b270f65
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->flow_action->num_entries != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "action trap is supported as a sole action only");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_trap = {
+ .can_offload = tc_act_can_offload_trap,
+ .parse_action = tc_act_parse_trap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
new file mode 100644
index 000000000..b4fa2de97
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (!act->tunnel) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Zero tunnel attributes is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->tun_info = act->tunnel;
+ parse_state->encap = true;
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->decap = true;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_encap = {
+ .can_offload = tc_act_can_offload_tun_encap,
+ .parse_action = tc_act_parse_tun_encap,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_decap = {
+ .can_offload = tc_act_can_offload_tun_decap,
+ .parse_action = tc_act_parse_tun_decap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
new file mode 100644
index 000000000..b86ac604d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+static int
+add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ const struct flow_action_entry prio_tag_act = {
+ .vlan.vid = 0,
+ .vlan.prio =
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_value(*action,
+ &parse_attr->spec),
+ first_prio) &
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_criteria(*action,
+ &parse_attr->spec),
+ first_prio),
+ };
+
+ return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
+ &prio_tag_act, parse_attr, action,
+ extack);
+}
+
+static int
+parse_tc_vlan_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_esw_flow_attr *attr,
+ u32 *action,
+ struct netlink_ext_ack *extack,
+ struct mlx5e_tc_act_parse_state *parse_state)
+{
+ u8 vlan_idx = attr->total_vlan;
+
+ if (vlan_idx >= MLX5_FS_VLAN_DEPTH) {
+ NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported");
+ return -EOPNOTSUPP;
+ }
+
+ switch (act->id) {
+ case FLOW_ACTION_VLAN_POP:
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
+ } else {
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ }
+ break;
+ case FLOW_ACTION_VLAN_PUSH:
+ attr->vlan_vid[vlan_idx] = act->vlan.vid;
+ attr->vlan_prio[vlan_idx] = act->vlan.prio;
+ attr->vlan_proto[vlan_idx] = act->vlan.proto;
+ if (!attr->vlan_proto[vlan_idx])
+ attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
+
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vlan push action is not supported for vlan depth > 1");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ } else {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
+ (act->vlan.proto != htons(ETH_P_8021Q) ||
+ act->vlan.prio)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan push action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ }
+ break;
+ case FLOW_ACTION_VLAN_POP_ETH:
+ parse_state->eth_pop = true;
+ break;
+ case FLOW_ACTION_VLAN_PUSH_ETH:
+ if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP))
+ return -EOPNOTSUPP;
+ parse_state->eth_push = true;
+ memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN);
+ memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN");
+ return -EINVAL;
+ }
+
+ attr->total_vlan = vlan_idx + 1;
+
+ return 0;
+}
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *vlan_dev = *out_dev;
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_PUSH,
+ .vlan.vid = vlan_dev_vlan_id(vlan_dev),
+ .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
+ .vlan.prio = 0,
+ };
+ int err;
+
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL);
+ if (err)
+ return err;
+
+ rcu_read_lock();
+ *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
+ rcu_read_unlock();
+ if (!*out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(*out_dev))
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack);
+
+ return err;
+}
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_POP,
+ };
+ int nest_level, err = 0;
+
+ nest_level = attr->parse_attr->filter_dev->lower_level -
+ priv->netdev->lower_level;
+ while (nest_level--) {
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action,
+ extack, NULL);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int err;
+
+ if (act->id == FLOW_ACTION_VLAN_PUSH &&
+ (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
+ /* Replace vlan pop+push with vlan modify */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act,
+ attr->parse_attr, &attr->action,
+ parse_state->extack);
+ } else {
+ err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action,
+ parse_state->extack, parse_state);
+ }
+
+ if (err)
+ return err;
+
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int err;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+ /* For prio tag mode, replace vlan pop with rewrite vlan prio
+ * tag rewrite.
+ */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = add_vlan_prio_tag_rewrite_action(priv, parse_attr,
+ &attr->action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan = {
+ .can_offload = tc_act_can_offload_vlan,
+ .parse_action = tc_act_parse_vlan,
+ .post_parse = tc_act_post_parse_vlan,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
new file mode 100644
index 000000000..2fa58c6f4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_VLAN_H__
+#define __MLX5_EN_TC_ACT_VLAN_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
new file mode 100644
index 000000000..9a8a1a6bd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ u16 mask16 = VLAN_VID_MASK;
+ u16 val16 = act->vlan.vid & VLAN_VID_MASK;
+ const struct flow_action_entry pedit_act = {
+ .id = FLOW_ACTION_MANGLE,
+ .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
+ .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
+ .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
+ .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
+ };
+ u8 match_prio_mask, match_prio_val;
+ void *headers_c, *headers_v;
+ int err;
+
+ headers_c = mlx5e_get_match_headers_criteria(*action, &parse_attr->spec);
+ headers_v = mlx5e_get_match_headers_value(*action, &parse_attr->spec);
+
+ if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
+ MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
+ NL_SET_ERR_MSG_MOD(extack, "VLAN rewrite action must have VLAN protocol match");
+ return -EOPNOTSUPP;
+ }
+
+ match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing VLAN prio is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs,
+ extack);
+ *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(parse_state->flow);
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr,
+ &attr->action, parse_state->extack);
+ if (err)
+ return err;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = {
+ .can_offload = tc_act_can_offload_vlan_mangle,
+ .parse_action = tc_act_parse_vlan_mangle,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
new file mode 100644
index 000000000..bb6b1a979
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_TC_CT_FS_H__
+#define __MLX5_EN_TC_CT_FS_H__
+
+struct mlx5_ct_fs {
+ const struct net_device *netdev;
+ struct mlx5_core_dev *dev;
+
+ /* private data */
+ void *priv_data[];
+};
+
+struct mlx5_ct_fs_rule {
+};
+
+struct mlx5_ct_fs_ops {
+ int (*init)(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct);
+ void (*destroy)(struct mlx5_ct_fs *fs);
+
+ struct mlx5_ct_fs_rule * (*ct_rule_add)(struct mlx5_ct_fs *fs,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule);
+ void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule);
+
+ size_t priv_size;
+};
+
+static inline void *mlx5_ct_fs_priv(struct mlx5_ct_fs *fs)
+{
+ return &fs->priv_data;
+}
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void);
+
+#if IS_ENABLED(CONFIG_MLX5_SW_STEERING)
+struct mlx5_ct_fs_ops *mlx5_ct_fs_smfs_ops_get(void);
+#else
+static inline struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return NULL;
+}
+#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */
+
+#endif /* __MLX5_EN_TC_CT_FS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
new file mode 100644
index 000000000..ae4f55be4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "en_tc.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_dmfs debug: " fmt "\n", ##args)
+
+struct mlx5_ct_fs_dmfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+};
+
+static int
+mlx5_ct_fs_dmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ return 0;
+}
+
+static void
+mlx5_ct_fs_dmfs_destroy(struct mlx5_ct_fs *fs)
+{
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_dmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5e_priv *priv = netdev_priv(fs->netdev);
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule;
+ int err;
+
+ dmfs_rule = kzalloc(sizeof(*dmfs_rule), GFP_KERNEL);
+ if (!dmfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ dmfs_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
+ if (IS_ERR(dmfs_rule->rule)) {
+ err = PTR_ERR(dmfs_rule->rule);
+ ct_dbg("Failed to add ct entry fs rule");
+ goto err_insert;
+ }
+
+ dmfs_rule->attr = attr;
+
+ return &dmfs_rule->fs_rule;
+
+err_insert:
+ kfree(dmfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_dmfs_rule,
+ fs_rule);
+
+ mlx5_tc_rule_delete(netdev_priv(fs->netdev), dmfs_rule->rule, dmfs_rule->attr);
+ kfree(dmfs_rule);
+}
+
+static struct mlx5_ct_fs_ops dmfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_dmfs_init,
+ .destroy = mlx5_ct_fs_dmfs_destroy,
+};
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void)
+{
+ return &dmfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
new file mode 100644
index 000000000..2b80fe735
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/refcount.h>
+
+#include "en_tc.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#include "lib/smfs.h"
+
+#define INIT_ERR_PREFIX "ct_fs_smfs init failed"
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args)
+#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16)
+
+struct mlx5_ct_fs_smfs_matcher {
+ struct mlx5dr_matcher *dr_matcher;
+ struct list_head list;
+ int prio;
+ refcount_t ref;
+};
+
+struct mlx5_ct_fs_smfs_matchers {
+ struct mlx5_ct_fs_smfs_matcher smfs_matchers[6];
+ struct list_head used;
+};
+
+struct mlx5_ct_fs_smfs {
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl;
+ struct mlx5_ct_fs_smfs_matchers matchers;
+ struct mlx5_ct_fs_smfs_matchers matchers_nat;
+ struct mlx5dr_action *fwd_action;
+ struct mlx5_flow_table *ct_nat;
+ struct mutex lock; /* Guards matchers */
+};
+
+struct mlx5_ct_fs_smfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5dr_rule *rule;
+ struct mlx5dr_action *count_action;
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+};
+
+static inline void
+mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp,
+ bool gre)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
+
+ if (likely(MLX5_CAP_FLOWTABLE_NIC_RX(fs->dev, ft_field_support.outer_ip_version)))
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ if (likely(ipv4)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6));
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6));
+ }
+
+ if (likely(tcp)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(MLX5_CT_TCP_FLAGS_MASK));
+ } else if (!gre) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport);
+ }
+
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 0, MLX5_CT_ZONE_MASK);
+}
+
+static struct mlx5dr_matcher *
+mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4,
+ bool tcp, bool gre, u32 priority)
+{
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
+
+ dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec);
+ kvfree(spec);
+ if (!dr_matcher)
+ return ERR_PTR(-EINVAL);
+
+ return dr_matcher;
+}
+
+static struct mlx5_ct_fs_smfs_matcher *
+mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher;
+ struct mlx5_ct_fs_smfs_matchers *matchers;
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5dr_table *tbl;
+ struct list_head *prev;
+ int prio;
+
+ matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers;
+ smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre];
+
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ return smfs_matcher;
+
+ mutex_lock(&fs_smfs->lock);
+
+ /* Retry with lock, as another thread might have already created the relevant matcher
+ * till we acquired the lock
+ */
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ goto out_unlock;
+
+ // Find next available priority in sorted used list
+ prio = 0;
+ prev = &matchers->used;
+ list_for_each_entry(m, &matchers->used, list) {
+ prev = &m->list;
+
+ if (m->prio == prio)
+ prio = m->prio + 1;
+ else
+ break;
+ }
+
+ tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl;
+ dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio);
+ if (IS_ERR(dr_matcher)) {
+ netdev_warn(fs->netdev,
+ "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
+ nat, ipv4, tcp, gre, PTR_ERR(dr_matcher));
+
+ smfs_matcher = ERR_CAST(dr_matcher);
+ goto out_unlock;
+ }
+
+ smfs_matcher->dr_matcher = dr_matcher;
+ smfs_matcher->prio = prio;
+ list_add(&smfs_matcher->list, prev);
+ refcount_set(&smfs_matcher->ref, 1);
+
+out_unlock:
+ mutex_unlock(&fs_smfs->lock);
+ return smfs_matcher;
+}
+
+static void
+mlx5_ct_fs_smfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_smfs_matcher *smfs_matcher)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ if (!refcount_dec_and_mutex_lock(&smfs_matcher->ref, &fs_smfs->lock))
+ return;
+
+ mlx5_smfs_matcher_destroy(smfs_matcher->dr_matcher);
+ list_del(&smfs_matcher->list);
+ mutex_unlock(&fs_smfs->lock);
+}
+
+static int
+mlx5_ct_fs_smfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl;
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ post_ct_tbl = mlx5_smfs_table_get_from_fs_ft(post_ct);
+ ct_nat_tbl = mlx5_smfs_table_get_from_fs_ft(ct_nat);
+ ct_tbl = mlx5_smfs_table_get_from_fs_ft(ct);
+ fs_smfs->ct_nat = ct_nat;
+
+ if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) {
+ netdev_warn(fs->netdev, "ct_fs_smfs: failed to init, missing backing dr tables");
+ return -EOPNOTSUPP;
+ }
+
+ ct_dbg("using smfs steering");
+
+ fs_smfs->fwd_action = mlx5_smfs_action_create_dest_table(post_ct_tbl);
+ if (!fs_smfs->fwd_action) {
+ return -EINVAL;
+ }
+
+ fs_smfs->ct_tbl = ct_tbl;
+ fs_smfs->ct_nat_tbl = ct_nat_tbl;
+ mutex_init(&fs_smfs->lock);
+ INIT_LIST_HEAD(&fs_smfs->matchers.used);
+ INIT_LIST_HEAD(&fs_smfs->matchers_nat.used);
+
+ return 0;
+}
+
+static void
+mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ mlx5_smfs_action_destroy(fs_smfs->fwd_action);
+}
+
+static inline bool
+mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys)
+{
+#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
+ const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META);
+ const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
+ const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
+
+ return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
+ used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
+}
+
+static bool
+mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule)
+{
+ struct flow_match_ipv4_addrs ipv4_addrs;
+ struct flow_match_ipv6_addrs ipv6_addrs;
+ struct flow_match_control control;
+ struct flow_match_basic basic;
+ struct flow_match_ports ports;
+ struct flow_match_tcp tcp;
+
+ if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) {
+ ct_dbg("rule uses unexpected dissectors (0x%08x)",
+ flow_rule->match.dissector->used_keys);
+ return false;
+ }
+
+ flow_rule_match_basic(flow_rule, &basic);
+ flow_rule_match_control(flow_rule, &control);
+ flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
+ flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
+ if (basic.key->ip_proto != IPPROTO_GRE)
+ flow_rule_match_ports(flow_rule, &ports);
+ if (basic.key->ip_proto == IPPROTO_TCP)
+ flow_rule_match_tcp(flow_rule, &tcp);
+
+ if (basic.mask->n_proto != htons(0xFFFF) ||
+ (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
+ basic.mask->ip_proto != 0xFF ||
+ (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
+ basic.key->ip_proto != IPPROTO_GRE)) {
+ ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
+ ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
+ basic.key->ip_proto, basic.mask->ip_proto);
+ return false;
+ }
+
+ if (basic.key->ip_proto != IPPROTO_GRE &&
+ (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
+ ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)",
+ ports.mask->src, ports.mask->dst);
+ return false;
+ }
+
+ if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) {
+ ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags);
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+ struct mlx5_ct_fs_smfs_rule *smfs_rule;
+ struct mlx5dr_action *actions[5];
+ struct mlx5dr_rule *rule;
+ int num_actions = 0, err;
+ bool nat, tcp, ipv4, gre;
+
+ if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL);
+ if (!smfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ smfs_rule->count_action = mlx5_smfs_action_create_flow_counter(mlx5_fc_id(attr->counter));
+ if (!smfs_rule->count_action) {
+ err = -EINVAL;
+ goto err_count;
+ }
+
+ actions[num_actions++] = smfs_rule->count_action;
+ actions[num_actions++] = attr->modify_hdr->action.dr_action;
+ actions[num_actions++] = fs_smfs->fwd_action;
+
+ nat = (attr->ft == fs_smfs->ct_nat);
+ ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
+ tcp = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_TCP;
+ gre = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_GRE;
+
+ smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre);
+ if (IS_ERR(smfs_matcher)) {
+ err = PTR_ERR(smfs_matcher);
+ goto err_matcher;
+ }
+
+ rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions,
+ spec->flow_context.flow_source);
+ if (!rule) {
+ err = -EINVAL;
+ goto err_create;
+ }
+
+ smfs_rule->rule = rule;
+ smfs_rule->smfs_matcher = smfs_matcher;
+
+ return &smfs_rule->fs_rule;
+
+err_create:
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_matcher);
+err_matcher:
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+err_count:
+ kfree(smfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_smfs_rule,
+ fs_rule);
+
+ mlx5_smfs_rule_destroy(smfs_rule->rule);
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_rule->smfs_matcher);
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+ kfree(smfs_rule);
+}
+
+static struct mlx5_ct_fs_ops fs_smfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_smfs_init,
+ .destroy = mlx5_ct_fs_smfs_destroy,
+
+ .priv_size = sizeof(struct mlx5_ct_fs_smfs),
+};
+
+struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return &fs_smfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
new file mode 100644
index 000000000..ca834bbcb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/fs.h>
+#include "en/mapping.h"
+#include "en/tc/int_port.h"
+#include "en.h"
+#include "en_rep.h"
+#include "en_tc.h"
+
+struct mlx5e_tc_int_port {
+ enum mlx5e_tc_int_port_type type;
+ int ifindex;
+ u32 match_metadata;
+ u32 mapping;
+ struct list_head list;
+ struct mlx5_flow_handle *rx_rule;
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
+};
+
+struct mlx5e_tc_int_port_priv {
+ struct mlx5_core_dev *dev;
+ struct mutex int_ports_lock; /* Protects int ports list */
+ struct list_head int_ports; /* Uses int_ports_lock */
+ u16 num_ports;
+ bool ul_rep_rx_ready; /* Set when uplink is performing teardown */
+ struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */
+};
+
+bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ MLX5_CAP_GEN(esw->dev, reg_c_preserve);
+}
+
+u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port)
+{
+ return int_port->match_metadata;
+}
+
+int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
+{
+ /* For egress forwarding we can have the case
+ * where the packet came from a vport and redirected
+ * to int port or it came from the uplink, going
+ * via internal port and hairpinned back to uplink
+ * so we set the source to any port in this case.
+ */
+ return int_port->type == MLX5E_TC_INT_PORT_EGRESS ?
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT :
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
+
+u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
+{
+ return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_int_port *int_port,
+ struct mlx5_flow_destination *dest)
+
+{
+ struct mlx5_flow_context *flow_context;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5e_tc_int_port_get_metadata_for_match(int_port));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ /* Overwrite flow tag with the int port metadata mapping
+ * instead of the chain mapping.
+ */
+ flow_context = &spec->flow_context;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = int_port->mapping;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+ &flow_act, dest, 1);
+ if (IS_ERR(flow_rule))
+ mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n",
+ PTR_ERR(flow_rule));
+
+ kvfree(spec);
+
+ return flow_rule;
+}
+
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type)
+{
+ struct mlx5e_tc_int_port *int_port;
+
+ if (!priv->ul_rep_rx_ready)
+ goto not_found;
+
+ list_for_each_entry(int_port, &priv->int_ports, list)
+ if (int_port->ifindex == ifindex && int_port->type == type) {
+ refcount_inc(&int_port->refcnt);
+ return int_port;
+ }
+
+not_found:
+ return NULL;
+}
+
+static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex, enum mlx5e_tc_int_port_type type,
+ u32 *id)
+{
+ u32 mapped_key[2] = {type, ifindex};
+ int err;
+
+ err = mapping_add(priv->metadata_mapping, mapped_key, id);
+ if (err)
+ return err;
+
+ /* Fill upper 4 bits of PFNUM with reserved value */
+ *id |= 0xf << ESW_VPORT_BITS;
+
+ return 0;
+}
+
+static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv,
+ u32 id)
+{
+ id &= (1 << ESW_VPORT_BITS) - 1;
+ mapping_remove(priv->metadata_mapping, id);
+}
+
+/* Must be called with priv->int_ports_lock held */
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type)
+{
+ struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
+ struct mlx5_mapped_obj mapped_obj = {};
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_int_port *int_port;
+ struct mlx5_flow_destination dest;
+ struct mapping_ctx *ctx;
+ u32 match_metadata;
+ u32 mapping;
+ int err;
+
+ if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) {
+ mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d",
+ MLX5E_TC_MAX_INT_PORT_NUM);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ int_port = kzalloc(sizeof(*int_port), GFP_KERNEL);
+ if (!int_port)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata);
+ if (err) {
+ mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d",
+ ifindex);
+ goto err_metadata;
+ }
+
+ /* map metadata to reg_c0 object for miss handling */
+ ctx = esw->offloads.reg_c0_obj_pool;
+ mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA;
+ mapped_obj.int_port_metadata = match_metadata;
+ err = mapping_add(ctx, &mapped_obj, &mapping);
+ if (err)
+ goto err_map;
+
+ int_port->type = type;
+ int_port->ifindex = ifindex;
+ int_port->match_metadata = match_metadata;
+ int_port->mapping = mapping;
+
+ /* Create a match on internal vport metadata in vport table */
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = uplink_rpriv->root_ft;
+
+ int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest);
+ if (IS_ERR(int_port->rx_rule)) {
+ err = PTR_ERR(int_port->rx_rule);
+ mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err);
+ goto err_rx_rule;
+ }
+
+ refcount_set(&int_port->refcnt, 1);
+ list_add_rcu(&int_port->list, &priv->int_ports);
+ priv->num_ports++;
+
+ return int_port;
+
+err_rx_rule:
+ mapping_remove(ctx, int_port->mapping);
+
+err_map:
+ mlx5e_int_port_metadata_free(priv, match_metadata);
+
+err_metadata:
+ kfree(int_port);
+
+ return ERR_PTR(err);
+}
+
+/* Must be called with priv->int_ports_lock held */
+static void
+mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv,
+ struct mlx5e_tc_int_port *int_port)
+{
+ struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
+ struct mapping_ctx *ctx;
+
+ ctx = esw->offloads.reg_c0_obj_pool;
+
+ list_del_rcu(&int_port->list);
+
+ /* The following parameters are not used by the
+ * rcu readers of this int_port object so it is
+ * safe to release them.
+ */
+ if (int_port->rx_rule)
+ mlx5_del_flow_rules(int_port->rx_rule);
+ mapping_remove(ctx, int_port->mapping);
+ mlx5e_int_port_metadata_free(priv, int_port->match_metadata);
+ kfree_rcu(int_port);
+ priv->num_ports--;
+}
+
+/* Must be called with rcu_read_lock held */
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv,
+ u32 metadata)
+{
+ struct mlx5e_tc_int_port *int_port;
+
+ list_for_each_entry_rcu(int_port, &priv->int_ports, list)
+ if (int_port->match_metadata == metadata)
+ return int_port;
+
+ return NULL;
+}
+
+struct mlx5e_tc_int_port *
+mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type)
+{
+ struct mlx5e_tc_int_port *int_port;
+
+ if (!priv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&priv->int_ports_lock);
+
+ /* Reject request if ul rep not ready */
+ if (!priv->ul_rep_rx_ready) {
+ int_port = ERR_PTR(-EOPNOTSUPP);
+ goto done;
+ }
+
+ int_port = mlx5e_int_port_lookup(priv, ifindex, type);
+ if (int_port)
+ goto done;
+
+ /* Alloc and add new int port to list */
+ int_port = mlx5e_int_port_add(priv, ifindex, type);
+
+done:
+ mutex_unlock(&priv->int_ports_lock);
+
+ return int_port;
+}
+
+void
+mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
+ struct mlx5e_tc_int_port *int_port)
+{
+ if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock))
+ return;
+
+ mlx5e_int_port_remove(priv, int_port);
+ mutex_unlock(&priv->int_ports_lock);
+}
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_tc_int_port_init(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_int_port_priv *int_port_priv;
+ u64 mapping_id;
+
+ if (!mlx5e_tc_int_port_supported(esw))
+ return NULL;
+
+ int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL);
+ if (!int_port_priv)
+ return NULL;
+
+ mapping_id = mlx5_query_nic_system_image_guid(priv->mdev);
+
+ int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT,
+ sizeof(u32) * 2,
+ (1 << ESW_VPORT_BITS) - 1, true);
+ if (IS_ERR(int_port_priv->metadata_mapping)) {
+ mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n",
+ PTR_ERR(int_port_priv->metadata_mapping));
+ goto err_mapping;
+ }
+
+ int_port_priv->dev = priv->mdev;
+ mutex_init(&int_port_priv->int_ports_lock);
+ INIT_LIST_HEAD(&int_port_priv->int_ports);
+
+ return int_port_priv;
+
+err_mapping:
+ kfree(int_port_priv);
+
+ return NULL;
+}
+
+void
+mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv)
+{
+ if (!priv)
+ return;
+
+ mutex_destroy(&priv->int_ports_lock);
+ mapping_destroy(priv->metadata_mapping);
+ kfree(priv);
+}
+
+/* Int port rx rules reside in ul rep rx tables.
+ * It is possible the ul rep will go down while there are
+ * still int port rules in its rx table so proper cleanup
+ * is required to free resources.
+ */
+void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_tc_int_port_priv *ppriv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ ppriv = uplink_priv->int_port_priv;
+
+ if (!ppriv)
+ return;
+
+ mutex_lock(&ppriv->int_ports_lock);
+ ppriv->ul_rep_rx_ready = true;
+ mutex_unlock(&ppriv->int_ports_lock);
+}
+
+void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_tc_int_port_priv *ppriv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_int_port *int_port;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ ppriv = uplink_priv->int_port_priv;
+
+ if (!ppriv)
+ return;
+
+ mutex_lock(&ppriv->int_ports_lock);
+
+ ppriv->ul_rep_rx_ready = false;
+
+ list_for_each_entry(int_port, &ppriv->int_ports, list) {
+ if (!IS_ERR_OR_NULL(int_port->rx_rule))
+ mlx5_del_flow_rules(int_port->rx_rule);
+
+ int_port->rx_rule = NULL;
+ }
+
+ mutex_unlock(&ppriv->int_ports_lock);
+}
+
+bool
+mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
+ struct sk_buff *skb, u32 int_vport_metadata,
+ bool *forward_tx)
+{
+ enum mlx5e_tc_int_port_type fwd_type;
+ struct mlx5e_tc_int_port *int_port;
+ struct net_device *dev;
+ int ifindex;
+
+ if (!priv)
+ return false;
+
+ rcu_read_lock();
+ int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata);
+ if (!int_port) {
+ rcu_read_unlock();
+ mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n",
+ int_vport_metadata);
+ return false;
+ }
+
+ ifindex = int_port->ifindex;
+ fwd_type = int_port->type;
+ rcu_read_unlock();
+
+ dev = dev_get_by_index(&init_net, ifindex);
+ if (!dev) {
+ mlx5_core_dbg(priv->dev,
+ "Couldn't find internal port device with ifindex: %d\n",
+ ifindex);
+ return false;
+ }
+
+ skb->skb_iif = dev->ifindex;
+ skb->dev = dev;
+
+ if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) {
+ skb->pkt_type = PACKET_HOST;
+ skb_set_redirected(skb, true);
+ *forward_tx = false;
+ } else {
+ skb_reset_network_header(skb);
+ skb_push_rcsum(skb, skb->mac_len);
+ skb_set_redirected(skb, false);
+ *forward_tx = true;
+ }
+
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h
new file mode 100644
index 000000000..e72c79d30
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_INT_PORT_H__
+#define __MLX5_EN_TC_INT_PORT_H__
+
+#include "en.h"
+
+struct mlx5e_tc_int_port;
+struct mlx5e_tc_int_port_priv;
+
+enum mlx5e_tc_int_port_type {
+ MLX5E_TC_INT_PORT_INGRESS,
+ MLX5E_TC_INT_PORT_EGRESS,
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw);
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_tc_int_port_init(struct mlx5e_priv *priv);
+void
+mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv);
+
+void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv);
+void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv);
+
+bool
+mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
+ struct sk_buff *skb, u32 int_vport_metadata,
+ bool *forward_tx);
+struct mlx5e_tc_int_port *
+mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type);
+void
+mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
+ struct mlx5e_tc_int_port *int_port);
+
+u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port);
+u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port);
+int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline u32
+mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
+{
+ return 0;
+}
+
+static inline int
+mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
+{
+ return 0;
+}
+
+static inline bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
+{
+ return false;
+}
+
+static inline void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) {}
+static inline void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+#endif /* __MLX5_EN_TC_INT_PORT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
new file mode 100644
index 000000000..be74e1403
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/math64.h>
+#include "lib/aso.h"
+#include "en/tc/post_act.h"
+#include "meter.h"
+#include "en/tc_priv.h"
+
+#define MLX5_START_COLOR_SHIFT 28
+#define MLX5_METER_MODE_SHIFT 24
+#define MLX5_CBS_EXP_SHIFT 24
+#define MLX5_CBS_MAN_SHIFT 16
+#define MLX5_CIR_EXP_SHIFT 8
+
+/* cir = 8*(10^9)*cir_mantissa/(2^cir_exponent)) bits/s */
+#define MLX5_CONST_CIR 8000000000ULL
+#define MLX5_CALC_CIR(m, e) ((MLX5_CONST_CIR * (m)) >> (e))
+#define MLX5_MAX_CIR ((MLX5_CONST_CIR * 0x100) - 1)
+
+/* cbs = cbs_mantissa*2^cbs_exponent */
+#define MLX5_CALC_CBS(m, e) ((m) << (e))
+#define MLX5_MAX_CBS ((0x100ULL << 0x1F) - 1)
+#define MLX5_MAX_HW_CBS 0x7FFFFFFF
+
+struct mlx5e_flow_meter_aso_obj {
+ struct list_head entry;
+ int base_id;
+ int total_meters;
+
+ unsigned long meters_map[0]; /* must be at the end of this struct */
+};
+
+struct mlx5e_flow_meters {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_aso *aso;
+ struct mutex aso_lock; /* Protects aso operations */
+ int log_granularity;
+ u32 pdn;
+
+ DECLARE_HASHTABLE(hashtbl, 8);
+
+ struct mutex sync_lock; /* protect flow meter operations */
+ struct list_head partial_list;
+ struct list_head full_list;
+
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_post_act *post_act;
+};
+
+static void
+mlx5e_flow_meter_cir_calc(u64 cir, u8 *man, u8 *exp)
+{
+ s64 _cir, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cir << e;
+ if ((s64)m < 0) /* overflow */
+ break;
+ m = div64_u64(m, MLX5_CONST_CIR);
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cir = MLX5_CALC_CIR(m, e);
+ _delta = cir - _cir;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+static void
+mlx5e_flow_meter_cbs_calc(u64 cbs, u8 *man, u8 *exp)
+{
+ s64 _cbs, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cbs >> e;
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cbs = MLX5_CALC_CBS(m, e);
+ _delta = cbs - _cbs;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params)
+{
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl;
+ struct mlx5_wqe_aso_data_seg *aso_data;
+ struct mlx5e_flow_meters *flow_meters;
+ u8 cir_man, cir_exp, cbs_man, cbs_exp;
+ struct mlx5_aso_wqe *aso_wqe;
+ unsigned long expires;
+ struct mlx5_aso *aso;
+ u64 rate, burst;
+ u8 ds_cnt;
+ int err;
+
+ rate = meter_params->rate;
+ burst = meter_params->burst;
+
+ /* HW treats each packet as 128 bytes in PPS mode */
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS) {
+ rate <<= 10;
+ burst <<= 7;
+ }
+
+ if (!rate || rate > MLX5_MAX_CIR || !burst || burst > MLX5_MAX_CBS)
+ return -EINVAL;
+
+ /* HW has limitation of total 31 bits for cbs */
+ if (burst > MLX5_MAX_HW_CBS) {
+ mlx5_core_warn(mdev,
+ "burst(%lld) is too large, use HW allowed value(%d)\n",
+ burst, MLX5_MAX_HW_CBS);
+ burst = MLX5_MAX_HW_CBS;
+ }
+
+ mlx5_core_dbg(mdev, "meter mode=%d\n", meter_params->mode);
+ mlx5e_flow_meter_cir_calc(rate, &cir_man, &cir_exp);
+ mlx5_core_dbg(mdev, "rate=%lld, cir=%lld, exp=%d, man=%d\n",
+ rate, MLX5_CALC_CIR(cir_man, cir_exp), cir_exp, cir_man);
+ mlx5e_flow_meter_cbs_calc(burst, &cbs_man, &cbs_exp);
+ mlx5_core_dbg(mdev, "burst=%lld, cbs=%lld, exp=%d, man=%d\n",
+ burst, MLX5_CALC_CBS((u64)cbs_man, cbs_exp), cbs_exp, cbs_man);
+
+ if (!cir_man || !cbs_man)
+ return -EINVAL;
+
+ flow_meters = meter->flow_meters;
+ aso = flow_meters->aso;
+
+ mutex_lock(&flow_meters->aso_lock);
+ aso_wqe = mlx5_aso_get_wqe(aso);
+ ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_DS);
+ mlx5_aso_build_wqe(aso, ds_cnt, aso_wqe, meter->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER);
+
+ aso_ctrl = &aso_wqe->aso_ctrl;
+ memset(aso_ctrl, 0, sizeof(*aso_ctrl));
+ aso_ctrl->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE << 6;
+ aso_ctrl->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE |
+ MLX5_ASO_ALWAYS_TRUE << 4;
+ aso_ctrl->data_offset_condition_operand = MLX5_ASO_LOGICAL_OR << 6;
+ aso_ctrl->data_mask = cpu_to_be64(0x80FFFFFFULL << (meter->idx ? 0 : 32));
+
+ aso_data = (struct mlx5_wqe_aso_data_seg *)(aso_wqe + 1);
+ memset(aso_data, 0, sizeof(*aso_data));
+ aso_data->bytewise_data[meter->idx * 8] = cpu_to_be32((0x1 << 31) | /* valid */
+ (MLX5_FLOW_METER_COLOR_GREEN << MLX5_START_COLOR_SHIFT));
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS)
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_NUM_PACKETS << MLX5_METER_MODE_SHIFT);
+ else
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH << MLX5_METER_MODE_SHIFT);
+
+ aso_data->bytewise_data[meter->idx * 8 + 2] = cpu_to_be32((cbs_exp << MLX5_CBS_EXP_SHIFT) |
+ (cbs_man << MLX5_CBS_MAN_SHIFT) |
+ (cir_exp << MLX5_CIR_EXP_SHIFT) |
+ cir_man);
+
+ mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl);
+
+ /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */
+ expires = jiffies + msecs_to_jiffies(10);
+ do {
+ err = mlx5_aso_poll_cq(aso, true);
+ if (err)
+ usleep_range(2, 10);
+ } while (err && time_is_after_jiffies(expires));
+ mutex_unlock(&flow_meters->aso_lock);
+
+ return err;
+}
+
+static int
+mlx5e_flow_meter_create_aso_obj(struct mlx5e_flow_meters *flow_meters, int *obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_flow_meter_aso_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ void *obj;
+ int err;
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, log_obj_range, flow_meters->log_granularity);
+
+ obj = MLX5_ADDR_OF(create_flow_meter_aso_obj_in, in, flow_meter_aso_obj);
+ MLX5_SET(flow_meter_aso_obj, obj, meter_aso_access_pd, flow_meters->pdn);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err) {
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) created\n", *obj_id);
+ }
+
+ return err;
+}
+
+static void
+mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) destroyed\n", obj_id);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters)
+{
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5_fc *counter;
+ int err, pos, total;
+ u32 id;
+
+ meter = kzalloc(sizeof(*meter), GFP_KERNEL);
+ if (!meter)
+ return ERR_PTR(-ENOMEM);
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_red_counter;
+ }
+ meter->red_counter = counter;
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_green_counter;
+ }
+ meter->green_counter = counter;
+
+ meters_obj = list_first_entry_or_null(&flow_meters->partial_list,
+ struct mlx5e_flow_meter_aso_obj,
+ entry);
+ /* 2 meters in one object */
+ total = 1 << (flow_meters->log_granularity + 1);
+ if (!meters_obj) {
+ err = mlx5e_flow_meter_create_aso_obj(flow_meters, &id);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create flow meter ASO object\n");
+ goto err_create;
+ }
+
+ meters_obj = kzalloc(sizeof(*meters_obj) + BITS_TO_BYTES(total),
+ GFP_KERNEL);
+ if (!meters_obj) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ meters_obj->base_id = id;
+ meters_obj->total_meters = total;
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ pos = 0;
+ } else {
+ pos = find_first_zero_bit(meters_obj->meters_map, total);
+ if (bitmap_weight(meters_obj->meters_map, total) == total - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->full_list);
+ }
+ }
+
+ bitmap_set(meters_obj->meters_map, pos, 1);
+ meter->flow_meters = flow_meters;
+ meter->meters_obj = meters_obj;
+ meter->obj_id = meters_obj->base_id + pos / 2;
+ meter->idx = pos % 2;
+
+ mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+
+ return meter;
+
+err_mem:
+ mlx5e_flow_meter_destroy_aso_obj(mdev, id);
+err_create:
+ mlx5_fc_destroy(mdev, meter->green_counter);
+err_green_counter:
+ mlx5_fc_destroy(mdev, meter->red_counter);
+err_red_counter:
+ kfree(meter);
+ return ERR_PTR(err);
+}
+
+static void
+__mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ int n, pos;
+
+ mlx5_fc_destroy(mdev, meter->green_counter);
+ mlx5_fc_destroy(mdev, meter->red_counter);
+
+ meters_obj = meter->meters_obj;
+ pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx;
+ bitmap_clear(meters_obj->meters_map, pos, 1);
+ n = bitmap_weight(meters_obj->meters_map, meters_obj->total_meters);
+ if (n == 0) {
+ list_del(&meters_obj->entry);
+ mlx5e_flow_meter_destroy_aso_obj(mdev, meters_obj->base_id);
+ kfree(meters_obj);
+ } else if (n == meters_obj->total_meters - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ }
+
+ mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+ kfree(meter);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_tc_meter_get(struct mlx5e_flow_meters *flow_meters, u32 index)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ hash_for_each_possible(flow_meters->hashtbl, meter, hlist, index)
+ if (meter->params.index == index)
+ goto add_ref;
+
+ return ERR_PTR(-ENOENT);
+
+add_ref:
+ meter->refcnt++;
+
+ return meter;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ mutex_unlock(&flow_meters->sync_lock);
+
+ return meter;
+}
+
+static void
+__mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ if (--meter->refcnt == 0) {
+ hash_del(&meter->hlist);
+ __mlx5e_flow_meter_free(meter);
+ }
+}
+
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+
+ mutex_lock(&flow_meters->sync_lock);
+ __mlx5e_tc_meter_put(meter);
+ mutex_unlock(&flow_meters->sync_lock);
+}
+
+static struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = __mlx5e_flow_meter_alloc(flow_meters);
+ if (IS_ERR(meter))
+ return meter;
+
+ hash_add(flow_meters->hashtbl, &meter->hlist, params->index);
+ meter->params.index = params->index;
+ meter->refcnt++;
+
+ return meter;
+}
+
+static int
+__mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ int err = 0;
+
+ if (meter->params.mode != params->mode || meter->params.rate != params->rate ||
+ meter->params.burst != params->burst) {
+ err = mlx5e_tc_meter_modify(mdev, meter, params);
+ if (err)
+ goto out;
+
+ meter->params.mode = params->mode;
+ meter->params.rate = params->rate;
+ meter->params.burst = params->burst;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&flow_meters->sync_lock);
+ err = __mlx5e_tc_meter_update(meter, params);
+ mutex_unlock(&flow_meters->sync_lock);
+ return err;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ if (IS_ERR(meter)) {
+ meter = mlx5e_tc_meter_alloc(flow_meters, params);
+ if (IS_ERR(meter)) {
+ err = PTR_ERR(meter);
+ goto err_get;
+ }
+ }
+
+ err = __mlx5e_tc_meter_update(meter, params);
+ if (err)
+ goto err_update;
+
+ mutex_unlock(&flow_meters->sync_lock);
+ return meter;
+
+err_update:
+ __mlx5e_tc_meter_put(meter);
+err_get:
+ mutex_unlock(&flow_meters->sync_lock);
+ return ERR_PTR(err);
+}
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters)
+{
+ return flow_meters->ns_type;
+}
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (IS_ERR_OR_NULL(post_act)) {
+ netdev_dbg(priv->netdev,
+ "flow meter offload is not supported, post action is missing\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ flow_meters = kzalloc(sizeof(*flow_meters), GFP_KERNEL);
+ if (!flow_meters)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_core_alloc_pd(mdev, &flow_meters->pdn);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc pd for flow meter aso, err=%d\n", err);
+ goto err_out;
+ }
+
+ flow_meters->aso = mlx5_aso_create(mdev, flow_meters->pdn);
+ if (IS_ERR(flow_meters->aso)) {
+ mlx5_core_warn(mdev, "Failed to create aso wqe for flow meter\n");
+ err = PTR_ERR(flow_meters->aso);
+ goto err_sq;
+ }
+
+ mutex_init(&flow_meters->sync_lock);
+ INIT_LIST_HEAD(&flow_meters->partial_list);
+ INIT_LIST_HEAD(&flow_meters->full_list);
+
+ flow_meters->ns_type = ns_type;
+ flow_meters->mdev = mdev;
+ flow_meters->post_act = post_act;
+ mutex_init(&flow_meters->aso_lock);
+ flow_meters->log_granularity = min_t(int, 6,
+ MLX5_CAP_QOS(mdev, log_meter_aso_max_alloc));
+
+ return flow_meters;
+
+err_sq:
+ mlx5_core_dealloc_pd(mdev, flow_meters->pdn);
+err_out:
+ kfree(flow_meters);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters)
+{
+ if (IS_ERR_OR_NULL(flow_meters))
+ return;
+
+ mlx5_aso_destroy(flow_meters->aso);
+ mlx5_core_dealloc_pd(flow_meters->mdev, flow_meters->pdn);
+ kfree(flow_meters);
+}
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse)
+{
+ u64 bytes1, packets1, lastuse1;
+ u64 bytes2, packets2, lastuse2;
+
+ mlx5_fc_query_cached(meter->green_counter, &bytes1, &packets1, &lastuse1);
+ mlx5_fc_query_cached(meter->red_counter, &bytes2, &packets2, &lastuse2);
+
+ *bytes = bytes1 + bytes2;
+ *packets = packets1 + packets2;
+ *drops = packets2;
+ *lastuse = max_t(u64, lastuse1, lastuse2);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
new file mode 100644
index 000000000..6de6e8a16
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_FLOW_METER_H__
+#define __MLX5_EN_FLOW_METER_H__
+
+struct mlx5e_post_meter_priv;
+struct mlx5e_flow_meter_aso_obj;
+struct mlx5e_flow_meters;
+struct mlx5_flow_attr;
+
+enum mlx5e_flow_meter_mode {
+ MLX5_RATE_LIMIT_BPS,
+ MLX5_RATE_LIMIT_PPS,
+};
+
+struct mlx5e_flow_meter_params {
+ enum mlx5e_flow_meter_mode mode;
+ /* police action index */
+ u32 index;
+ u64 rate;
+ u64 burst;
+};
+
+struct mlx5e_flow_meter_handle {
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ u32 obj_id;
+ u8 idx;
+
+ int refcnt;
+ struct hlist_node hlist;
+ struct mlx5e_flow_meter_params params;
+
+ struct mlx5_fc *green_counter;
+ struct mlx5_fc *red_counter;
+};
+
+struct mlx5e_meter_attr {
+ struct mlx5e_flow_meter_params params;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5e_post_meter_priv *post_meter;
+};
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params);
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter);
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params);
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters);
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_action);
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters);
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse);
+
+#endif /* __MLX5_EN_FLOW_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
new file mode 100644
index 000000000..0290e0dea
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en/tc_priv.h"
+#include "en_tc.h"
+#include "post_act.h"
+#include "mlx5_core.h"
+#include "fs_core.h"
+
+struct mlx5e_post_act {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_flow_table *ft;
+ struct mlx5e_priv *priv;
+ struct xarray ids;
+};
+
+struct mlx5e_post_act_handle {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_handle *rule;
+ u32 id;
+};
+
+#define MLX5_POST_ACTION_BITS MLX5_REG_MAPPING_MBITS(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MASK MLX5_REG_MAPPING_MASK(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MAX MLX5_POST_ACTION_MASK
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ enum fs_flow_table_type table_type = ns_type == MLX5_FLOW_NAMESPACE_FDB ?
+ FS_FT_FDB : FS_FT_NIC_RX;
+ struct mlx5e_post_act *post_act;
+ int err;
+
+ if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) {
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
+ mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+ err = -EOPNOTSUPP;
+ goto err_check;
+ }
+
+ post_act = kzalloc(sizeof(*post_act), GFP_KERNEL);
+ if (!post_act) {
+ err = -ENOMEM;
+ goto err_check;
+ }
+ post_act->ft = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(post_act->ft)) {
+ err = PTR_ERR(post_act->ft);
+ mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err);
+ goto err_ft;
+ }
+ post_act->chains = chains;
+ post_act->ns_type = ns_type;
+ post_act->priv = priv;
+ xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1);
+ return post_act;
+
+err_ft:
+ kfree(post_act);
+err_check:
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act)
+{
+ if (IS_ERR_OR_NULL(post_act))
+ return;
+
+ xa_destroy(&post_act->ids);
+ mlx5_chains_destroy_global_table(post_act->chains, post_act->ft);
+ kfree(post_act);
+}
+
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Post action rule matches on fte_id and executes original rule's tc rule action */
+ mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK);
+
+ handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr);
+ if (IS_ERR(handle->rule)) {
+ err = PTR_ERR(handle->rule);
+ netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
+ goto err_rule;
+ }
+
+ kvfree(spec);
+ return 0;
+
+err_rule:
+ kvfree(spec);
+ return err;
+}
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr)
+{
+ struct mlx5e_post_act_handle *handle;
+ int err;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle) {
+ kfree(handle);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = post_act->ft;
+ post_attr->inner_match_level = MLX5_MATCH_NONE;
+ post_attr->outer_match_level = MLX5_MATCH_NONE;
+ post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
+
+ handle->ns_type = post_act->ns_type;
+ /* Splits were handled before post action */
+ if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ post_attr->esw_attr->split_count = 0;
+
+ err = xa_alloc(&post_act->ids, &handle->id, post_attr,
+ XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL);
+ if (err)
+ goto err_xarray;
+
+ handle->attr = post_attr;
+
+ return handle;
+
+err_xarray:
+ kfree(handle);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr);
+ handle->rule = NULL;
+}
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle)
+{
+ if (!IS_ERR_OR_NULL(handle->rule))
+ mlx5e_tc_post_act_unoffload(post_act, handle);
+ xa_erase(&post_act->ids, handle->id);
+ kfree(handle);
+}
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act)
+{
+ return post_act->ft;
+}
+
+/* Allocate a header modify action to write the post action handle fte id to a register. */
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts)
+{
+ return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
new file mode 100644
index 000000000..40b8df184
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_POST_ACTION_H__
+#define __MLX5_POST_ACTION_H__
+
+#include "en.h"
+#include "lib/fs_chains.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_priv;
+struct mlx5e_tc_mod_hdr_acts;
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act);
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr);
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle);
+
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
+void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act);
+
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts);
+
+#endif /* __MLX5_POST_ACTION_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
new file mode 100644
index 000000000..8b77e8228
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en/tc_priv.h"
+#include "post_meter.h"
+#include "en/tc/post_act.h"
+
+#define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG)
+#define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG)
+
+struct mlx5e_post_meter_priv {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_handle *fwd_green_rule;
+ struct mlx5_flow_handle *drop_red_rule;
+};
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter)
+{
+ return post_meter->ft;
+}
+
+static int
+mlx5e_post_meter_table_create(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *root_ns;
+
+ root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type);
+ if (!root_ns) {
+ mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = FDB_SLOW_PATH;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ post_meter->ft = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(post_meter->ft)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n");
+ return PTR_ERR(post_meter->ft);
+ }
+
+ return 0;
+}
+
+static int
+mlx5e_post_meter_fg_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *misc2, *match_criteria;
+ u32 *flow_group_in;
+ int err = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ misc2 = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_5, MLX5_PACKET_COLOR_MASK);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ post_meter->fg = mlx5_create_flow_group(post_meter->ft, flow_group_in);
+ if (IS_ERR(post_meter->fg)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n");
+ err = PTR_ERR(post_meter->fg);
+ }
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static int
+mlx5e_post_meter_rules_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[0].counter_id = mlx5_fc_id(red_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow drop rule\n");
+ err = PTR_ERR(rule);
+ goto err_red;
+ }
+ post_meter->drop_red_rule = rule;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[0].ft = mlx5e_tc_post_act_get_ft(post_act);
+ dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[1].counter_id = mlx5_fc_id(green_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 2);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow fwd rule\n");
+ err = PTR_ERR(rule);
+ goto err_green;
+ }
+ post_meter->fwd_green_rule = rule;
+
+ kvfree(spec);
+ return 0;
+
+err_green:
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+err_red:
+ kvfree(spec);
+ return err;
+}
+
+static void
+mlx5e_post_meter_rules_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+ mlx5_del_flow_rules(post_meter->fwd_green_rule);
+}
+
+static void
+mlx5e_post_meter_fg_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_group(post_meter->fg);
+}
+
+static void
+mlx5e_post_meter_table_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_table(post_meter->ft);
+}
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5e_post_meter_priv *post_meter;
+ int err;
+
+ post_meter = kzalloc(sizeof(*post_meter), GFP_KERNEL);
+ if (!post_meter)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_post_meter_table_create(priv, ns_type, post_meter);
+ if (err)
+ goto err_ft;
+
+ err = mlx5e_post_meter_fg_create(priv, post_meter);
+ if (err)
+ goto err_fg;
+
+ err = mlx5e_post_meter_rules_create(priv, post_meter, post_act, green_counter,
+ red_counter);
+ if (err)
+ goto err_rules;
+
+ return post_meter;
+
+err_rules:
+ mlx5e_post_meter_fg_destroy(post_meter);
+err_fg:
+ mlx5e_post_meter_table_destroy(post_meter);
+err_ft:
+ kfree(post_meter);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5e_post_meter_rules_destroy(post_meter);
+ mlx5e_post_meter_fg_destroy(post_meter);
+ mlx5e_post_meter_table_destroy(post_meter);
+ kfree(post_meter);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
new file mode 100644
index 000000000..34d0e4b9f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_POST_METER_H__
+#define __MLX5_EN_POST_METER_H__
+
+#define packet_color_to_reg { \
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5, \
+ .moffset = 0, \
+ .mlen = 8, \
+ .soffset = MLX5_BYTE_OFF(fte_match_param, \
+ misc_parameters_2.metadata_reg_c_5), \
+}
+
+struct mlx5e_post_meter_priv;
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter);
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter);
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter);
+
+#endif /* __MLX5_EN_POST_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
new file mode 100644
index 000000000..c57b09727
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/skbuff.h>
+#include <net/psample.h>
+#include "en/mapping.h"
+#include "en/tc/post_act.h"
+#include "en/tc/act/sample.h"
+#include "en/mod_hdr.h"
+#include "sample.h"
+#include "eswitch.h"
+#include "en_tc.h"
+#include "fs_core.h"
+
+#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024)
+
+static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
+ .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE,
+ .max_num_groups = 0, /* default num of groups */
+ .flags = 0,
+};
+
+struct mlx5e_tc_psample {
+ struct mlx5_eswitch *esw;
+ struct mlx5_flow_table *termtbl;
+ struct mlx5_flow_handle *termtbl_rule;
+ DECLARE_HASHTABLE(hashtbl, 8);
+ struct mutex ht_lock; /* protect hashtbl */
+ DECLARE_HASHTABLE(restore_hashtbl, 8);
+ struct mutex restore_lock; /* protect restore_hashtbl */
+ struct mlx5e_post_act *post_act;
+};
+
+struct mlx5e_sampler {
+ struct hlist_node hlist;
+ u32 sampler_id;
+ u32 sample_ratio;
+ u32 sample_table_id;
+ u32 default_table_id;
+ int count;
+};
+
+struct mlx5e_sample_flow {
+ struct mlx5e_sampler *sampler;
+ struct mlx5e_sample_restore *restore;
+ struct mlx5_flow_attr *pre_attr;
+ struct mlx5_flow_handle *pre_rule;
+ struct mlx5_flow_attr *post_attr;
+ struct mlx5_flow_handle *post_rule;
+};
+
+struct mlx5e_sample_restore {
+ struct hlist_node hlist;
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_flow_handle *rule;
+ u32 obj_id;
+ int count;
+};
+
+static int
+sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample)
+{
+ struct mlx5_eswitch *esw = tc_psample->esw;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_act act = {};
+ int err;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table)) {
+ mlx5_core_warn(dev, "termination table is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ mlx5_core_warn(dev, "failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.prio = FDB_SLOW_PATH;
+ ft_attr.max_fte = 1;
+ ft_attr.level = 1;
+ tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(tc_psample->termtbl)) {
+ err = PTR_ERR(tc_psample->termtbl);
+ mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err);
+ return err;
+ }
+
+ act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.vport.num = esw->manager_vport;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1);
+ if (IS_ERR(tc_psample->termtbl_rule)) {
+ err = PTR_ERR(tc_psample->termtbl_rule);
+ mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample)
+{
+ mlx5_del_flow_rules(tc_psample->termtbl_rule);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
+}
+
+static int
+sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler)
+{
+ u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u64 general_obj_types;
+ void *obj;
+ int err;
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER))
+ return -EOPNOTSUPP;
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))
+ return -EOPNOTSUPP;
+
+ obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object);
+ MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB);
+ MLX5_SET(sampler_obj, obj, ignore_flow_level, 1);
+ MLX5_SET(sampler_obj, obj, level, 1);
+ MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio);
+ MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id);
+ MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id);
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return err;
+}
+
+static void
+sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static u32
+sampler_hash(u32 sample_ratio, u32 default_table_id)
+{
+ return jhash_2words(sample_ratio, default_table_id, 0);
+}
+
+static int
+sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2)
+{
+ return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2;
+}
+
+static struct mlx5e_sampler *
+sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id)
+{
+ struct mlx5e_sampler *sampler;
+ u32 hash_key;
+ int err;
+
+ mutex_lock(&tc_psample->ht_lock);
+ hash_key = sampler_hash(sample_ratio, default_table_id);
+ hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key)
+ if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id,
+ sample_ratio, default_table_id))
+ goto add_ref;
+
+ sampler = kzalloc(sizeof(*sampler), GFP_KERNEL);
+ if (!sampler) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ sampler->sample_table_id = tc_psample->termtbl->id;
+ sampler->default_table_id = default_table_id;
+ sampler->sample_ratio = sample_ratio;
+
+ err = sampler_obj_create(tc_psample->esw->dev, sampler);
+ if (err)
+ goto err_create;
+
+ hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key);
+
+add_ref:
+ sampler->count++;
+ mutex_unlock(&tc_psample->ht_lock);
+ return sampler;
+
+err_create:
+ kfree(sampler);
+err_alloc:
+ mutex_unlock(&tc_psample->ht_lock);
+ return ERR_PTR(err);
+}
+
+static void
+sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler)
+{
+ mutex_lock(&tc_psample->ht_lock);
+ if (--sampler->count == 0) {
+ hash_del(&sampler->hlist);
+ sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id);
+ kfree(sampler);
+ }
+ mutex_unlock(&tc_psample->ht_lock);
+}
+
+/* obj_id is used to restore the sample parameters.
+ * Set fte_id in original flow table, then match it in the default table.
+ * Only set it for NICs can preserve reg_c or decap action. For other cases,
+ * use the same match in the default table.
+ * Use one header rewrite for both obj_id and fte_id.
+ */
+static struct mlx5_modify_hdr *
+sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct mlx5_modify_hdr *modify_hdr;
+ int err;
+
+ err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ CHAIN_TO_REG, obj_id);
+ if (err)
+ goto err_set_regc0;
+
+ modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
+ mod_acts->num_actions,
+ mod_acts->actions);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ goto err_modify_hdr;
+ }
+
+ mlx5e_mod_hdr_dealloc(mod_acts);
+ return modify_hdr;
+
+err_modify_hdr:
+ mlx5e_mod_hdr_dealloc(mod_acts);
+err_set_regc0:
+ return ERR_PTR(err);
+}
+
+static struct mlx5e_sample_restore *
+sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct mlx5_eswitch *esw = tc_psample->esw;
+ struct mlx5_core_dev *mdev = esw->dev;
+ struct mlx5e_sample_restore *restore;
+ struct mlx5_modify_hdr *modify_hdr;
+ int err;
+
+ mutex_lock(&tc_psample->restore_lock);
+ hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id)
+ if (restore->obj_id == obj_id)
+ goto add_ref;
+
+ restore = kzalloc(sizeof(*restore), GFP_KERNEL);
+ if (!restore) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ restore->obj_id = obj_id;
+
+ modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ goto err_modify_hdr;
+ }
+ restore->modify_hdr = modify_hdr;
+
+ restore->rule = esw_add_restore_rule(esw, obj_id);
+ if (IS_ERR(restore->rule)) {
+ err = PTR_ERR(restore->rule);
+ goto err_restore;
+ }
+
+ hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id);
+add_ref:
+ restore->count++;
+ mutex_unlock(&tc_psample->restore_lock);
+ return restore;
+
+err_restore:
+ mlx5_modify_header_dealloc(mdev, restore->modify_hdr);
+err_modify_hdr:
+ kfree(restore);
+err_alloc:
+ mutex_unlock(&tc_psample->restore_lock);
+ return ERR_PTR(err);
+}
+
+static void
+sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore)
+{
+ mutex_lock(&tc_psample->restore_lock);
+ if (--restore->count == 0)
+ hash_del(&restore->hlist);
+ mutex_unlock(&tc_psample->restore_lock);
+
+ if (!restore->count) {
+ mlx5_del_flow_rules(restore->rule);
+ mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr);
+ kfree(restore);
+ }
+}
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
+{
+ u32 trunc_size = mapped_obj->sample.trunc_size;
+ struct psample_group psample_group = {};
+ struct psample_metadata md = {};
+
+ md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len;
+ md.in_ifindex = skb->dev->ifindex;
+ psample_group.group_num = mapped_obj->sample.group_id;
+ psample_group.net = &init_net;
+ skb_push(skb, skb->mac_len);
+
+ psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
+}
+
+static int
+add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr,
+ u32 *default_tbl_id)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB);
+ struct mlx5_vport_tbl_attr per_vport_tbl_attr;
+ struct mlx5_flow_table *default_tbl;
+ struct mlx5_flow_attr *post_attr;
+ int err;
+
+ /* Allocate default table per vport, chain and prio. Otherwise, there is
+ * only one default table for the same sampler object. Rules with different
+ * prio and chain may overlap. For CT sample action, per vport default
+ * table is needed to resotre the metadata.
+ */
+ per_vport_tbl_attr.chain = attr->chain;
+ per_vport_tbl_attr.prio = attr->prio;
+ per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
+ per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
+ if (IS_ERR(default_tbl)) {
+ err = PTR_ERR(default_tbl);
+ goto err_default_tbl;
+ }
+ *default_tbl_id = default_tbl->id;
+
+ post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!post_attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+ sample_flow->post_attr = post_attr;
+ memcpy(post_attr, attr, attr_sz);
+ /* Perform the original matches on the default table.
+ * Offload all actions except the sample action.
+ */
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = default_tbl;
+ post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT;
+
+ /* When offloading sample and encap action, if there is no valid
+ * neigh data struct, a slow path rule is offloaded first. Source
+ * port metadata match is set at that time. A per vport table is
+ * already allocated. No need to match it again. So clear the source
+ * port metadata match.
+ */
+ mlx5_eswitch_clear_rule_source_port(esw, spec);
+ sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr);
+ if (IS_ERR(sample_flow->post_rule)) {
+ err = PTR_ERR(sample_flow->post_rule);
+ goto err_rule;
+ }
+ return 0;
+
+err_rule:
+ kfree(post_attr);
+err_attr:
+ mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
+err_default_tbl:
+ return err;
+}
+
+static void
+del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_vport_tbl_attr tbl_attr;
+
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr);
+ kfree(sample_flow->post_attr);
+ tbl_attr.chain = attr->chain;
+ tbl_attr.prio = attr->prio;
+ tbl_attr.vport = esw_attr->in_rep->vport;
+ tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ mlx5_esw_vporttbl_put(esw, &tbl_attr);
+}
+
+/* For the following typical flow table:
+ *
+ * +-------------------------------+
+ * + original flow table +
+ * +-------------------------------+
+ * + original match +
+ * +-------------------------------+
+ * + sample action + other actions +
+ * +-------------------------------+
+ *
+ * We translate the tc filter with sample action to the following HW model:
+ *
+ * +---------------------+
+ * + original flow table +
+ * +---------------------+
+ * + original match +
+ * +---------------------+
+ * | set fte_id (if reg_c preserve cap)
+ * | do decap (if required)
+ * v
+ * +------------------------------------------------+
+ * + Flow Sampler Object +
+ * +------------------------------------------------+
+ * + sample ratio +
+ * +------------------------------------------------+
+ * + sample table id | default table id +
+ * +------------------------------------------------+
+ * | |
+ * v v
+ * +-----------------------------+ +-------------------+
+ * + sample table + + default table +
+ * +-----------------------------+ +-------------------+
+ * + forward to management vport + |
+ * +-----------------------------+ |
+ * +-------+------+
+ * | |reg_c preserve cap
+ * | |or decap action
+ * v v
+ * +-----------------+ +-------------+
+ * + per vport table + + post action +
+ * +-----------------+ +-------------+
+ * + original match +
+ * +-----------------+
+ * + other actions +
+ * +-----------------+
+ */
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_esw_flow_attr *pre_esw_attr;
+ struct mlx5_mapped_obj restore_obj = {};
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ struct mlx5e_sample_flow *sample_flow;
+ struct mlx5e_sample_attr *sample_attr;
+ struct mlx5_flow_attr *pre_attr;
+ struct mlx5_eswitch *esw;
+ u32 default_tbl_id;
+ u32 obj_id;
+ int err;
+
+ if (IS_ERR_OR_NULL(tc_psample))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
+ if (!sample_flow)
+ return ERR_PTR(-ENOMEM);
+ sample_attr = &attr->sample_attr;
+ sample_attr->sample_flow = sample_flow;
+
+ /* For NICs with reg_c_preserve support or decap action, use
+ * post action instead of the per vport, chain and prio table.
+ * Only match the fte id instead of the same match in the
+ * original flow table.
+ */
+ esw = tc_psample->esw;
+ if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) {
+ struct mlx5_flow_table *ft;
+
+ ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act);
+ default_tbl_id = ft->id;
+ } else {
+ err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id);
+ if (err)
+ goto err_post_rule;
+ }
+
+ /* Create sampler object. */
+ sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id);
+ if (IS_ERR(sample_flow->sampler)) {
+ err = PTR_ERR(sample_flow->sampler);
+ goto err_sampler;
+ }
+ sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+
+ /* Create an id mapping reg_c0 value to sample object. */
+ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
+ restore_obj.sample.group_id = sample_attr->group_num;
+ restore_obj.sample.rate = sample_attr->rate;
+ restore_obj.sample.trunc_size = sample_attr->trunc_size;
+ restore_obj.sample.tunnel_id = attr->tunnel_id;
+ err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
+ if (err)
+ goto err_obj_id;
+ sample_attr->restore_obj_id = obj_id;
+
+ /* Create sample restore context. */
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+ sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts);
+ if (IS_ERR(sample_flow->restore)) {
+ err = PTR_ERR(sample_flow->restore);
+ goto err_sample_restore;
+ }
+
+ /* Perform the original matches on the original table. Offload the
+ * sample action. The destination is the sampler object.
+ */
+ pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!pre_attr) {
+ err = -ENOMEM;
+ goto err_alloc_pre_flow_attr;
+ }
+ pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ /* For decap action, do decap in the original flow table instead of the
+ * default flow table.
+ */
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
+ pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE;
+ pre_attr->inner_match_level = attr->inner_match_level;
+ pre_attr->outer_match_level = attr->outer_match_level;
+ pre_attr->chain = attr->chain;
+ pre_attr->prio = attr->prio;
+ pre_attr->ft = attr->ft;
+ pre_attr->sample_attr = *sample_attr;
+ pre_esw_attr = pre_attr->esw_attr;
+ pre_esw_attr->in_mdev = esw_attr->in_mdev;
+ pre_esw_attr->in_rep = esw_attr->in_rep;
+ sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr);
+ if (IS_ERR(sample_flow->pre_rule)) {
+ err = PTR_ERR(sample_flow->pre_rule);
+ goto err_pre_offload_rule;
+ }
+ sample_flow->pre_attr = pre_attr;
+
+ return sample_flow->pre_rule;
+
+err_pre_offload_rule:
+ kfree(pre_attr);
+err_alloc_pre_flow_attr:
+ sample_restore_put(tc_psample, sample_flow->restore);
+err_sample_restore:
+ mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id);
+err_obj_id:
+ sampler_put(tc_psample, sample_flow->sampler);
+err_sampler:
+ if (sample_flow->post_rule)
+ del_post_rule(esw, sample_flow, attr);
+err_post_rule:
+ kfree(sample_flow);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_sample_flow *sample_flow;
+ struct mlx5_eswitch *esw;
+
+ if (IS_ERR_OR_NULL(tc_psample))
+ return;
+
+ /* The following delete order can't be changed, otherwise,
+ * will hit fw syndromes.
+ */
+ esw = tc_psample->esw;
+ sample_flow = attr->sample_attr.sample_flow;
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
+
+ sample_restore_put(tc_psample, sample_flow->restore);
+ mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id);
+ sampler_put(tc_psample, sample_flow->sampler);
+ if (sample_flow->post_rule)
+ del_post_rule(esw, sample_flow, attr);
+
+ kfree(sample_flow->pre_attr);
+ kfree(sample_flow);
+}
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
+{
+ struct mlx5e_tc_psample *tc_psample;
+ int err;
+
+ tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL);
+ if (!tc_psample)
+ return ERR_PTR(-ENOMEM);
+ if (IS_ERR_OR_NULL(post_act)) {
+ err = PTR_ERR(post_act);
+ goto err_post_act;
+ }
+ tc_psample->post_act = post_act;
+ tc_psample->esw = esw;
+ err = sampler_termtbl_create(tc_psample);
+ if (err)
+ goto err_post_act;
+
+ mutex_init(&tc_psample->ht_lock);
+ mutex_init(&tc_psample->restore_lock);
+
+ return tc_psample;
+
+err_post_act:
+ kfree(tc_psample);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample)
+{
+ if (IS_ERR_OR_NULL(tc_psample))
+ return;
+
+ mutex_destroy(&tc_psample->restore_lock);
+ mutex_destroy(&tc_psample->ht_lock);
+ sampler_termtbl_destroy(tc_psample);
+ kfree(tc_psample);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
new file mode 100644
index 000000000..a569367ea
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_SAMPLE_H__
+#define __MLX5_EN_TC_SAMPLE_H__
+
+#include "eswitch.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_tc_psample;
+struct mlx5e_post_act;
+
+struct mlx5e_sample_attr {
+ u32 group_num;
+ u32 rate;
+ u32 trunc_size;
+ u32 restore_obj_id;
+ u32 sampler_id;
+ struct mlx5e_sample_flow *sample_flow;
+};
+
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
+
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act);
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
+
+#else /* CONFIG_MLX5_TC_SAMPLE */
+
+static inline struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{ return ERR_PTR(-EOPNOTSUPP); }
+
+static inline void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr) {}
+
+static inline struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
+{ return ERR_PTR(-EOPNOTSUPP); }
+
+static inline void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) {}
+
+static inline void
+mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) {}
+
+#endif /* CONFIG_MLX5_TC_SAMPLE */
+#endif /* __MLX5_EN_TC_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
new file mode 100644
index 000000000..f01f7dfdb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -0,0 +1,2272 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <uapi/linux/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_ct.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <linux/workqueue.h>
+#include <linux/refcount.h>
+#include <linux/xarray.h>
+#include <linux/if_macvlan.h>
+#include <linux/debugfs.h>
+
+#include "lib/fs_chains.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+#include "en/tc_priv.h"
+#include "en/mod_hdr.h"
+#include "en/mapping.h"
+#include "en/tc/post_act.h"
+#include "en.h"
+#include "en_tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+
+#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
+#define MLX5_CT_STATE_TRK_BIT BIT(2)
+#define MLX5_CT_STATE_NAT_BIT BIT(3)
+#define MLX5_CT_STATE_REPLY_BIT BIT(4)
+#define MLX5_CT_STATE_RELATED_BIT BIT(5)
+#define MLX5_CT_STATE_INVALID_BIT BIT(6)
+
+#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
+#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
+
+/* Statically allocate modify actions for
+ * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
+ * This will be increased dynamically if needed (for the ipv6 snat + dnat).
+ */
+#define MLX5_CT_MIN_MOD_ACTS 10
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
+
+struct mlx5_tc_ct_debugfs {
+ struct {
+ atomic_t offloaded;
+ atomic_t rx_dropped;
+ } stats;
+
+ struct dentry *root;
+};
+
+struct mlx5_tc_ct_priv {
+ struct mlx5_core_dev *dev;
+ const struct net_device *netdev;
+ struct mod_hdr_tbl *mod_hdr_tbl;
+ struct xarray tuple_ids;
+ struct rhashtable zone_ht;
+ struct rhashtable ct_tuples_ht;
+ struct rhashtable ct_tuples_nat_ht;
+ struct mlx5_flow_table *ct;
+ struct mlx5_flow_table *ct_nat;
+ struct mlx5e_post_act *post_act;
+ struct mutex control_lock; /* guards parallel adds/dels */
+ struct mapping_ctx *zone_mapping;
+ struct mapping_ctx *labels_mapping;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_ct_fs *fs;
+ struct mlx5_ct_fs_ops *fs_ops;
+ spinlock_t ht_lock; /* protects ft entries */
+ struct workqueue_struct *wq;
+
+ struct mlx5_tc_ct_debugfs debugfs;
+};
+
+struct mlx5_ct_flow {
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_flow_handle *pre_ct_rule;
+ struct mlx5_ct_ft *ft;
+ u32 chain_mapping;
+};
+
+struct mlx5_ct_zone_rule {
+ struct mlx5_ct_fs_rule *rule;
+ struct mlx5e_mod_hdr_handle *mh;
+ struct mlx5_flow_attr *attr;
+ bool nat;
+};
+
+struct mlx5_tc_ct_pre {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *flow_grp;
+ struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_handle *miss_rule;
+ struct mlx5_modify_hdr *modify_hdr;
+};
+
+struct mlx5_ct_ft {
+ struct rhash_head node;
+ u16 zone;
+ u32 zone_restore_id;
+ refcount_t refcount;
+ struct nf_flowtable *nf_ft;
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct rhashtable ct_entries_ht;
+ struct mlx5_tc_ct_pre pre_ct;
+ struct mlx5_tc_ct_pre pre_ct_nat;
+};
+
+struct mlx5_ct_tuple {
+ u16 addr_type;
+ __be16 n_proto;
+ u8 ip_proto;
+ struct {
+ union {
+ __be32 src_v4;
+ struct in6_addr src_v6;
+ };
+ union {
+ __be32 dst_v4;
+ struct in6_addr dst_v6;
+ };
+ } ip;
+ struct {
+ __be16 src;
+ __be16 dst;
+ } port;
+
+ u16 zone;
+};
+
+struct mlx5_ct_counter {
+ struct mlx5_fc *counter;
+ refcount_t refcount;
+ bool is_shared;
+};
+
+enum {
+ MLX5_CT_ENTRY_FLAG_VALID,
+};
+
+struct mlx5_ct_entry {
+ struct rhash_head node;
+ struct rhash_head tuple_node;
+ struct rhash_head tuple_nat_node;
+ struct mlx5_ct_counter *counter;
+ unsigned long cookie;
+ unsigned long restore_cookie;
+ struct mlx5_ct_tuple tuple;
+ struct mlx5_ct_tuple tuple_nat;
+ struct mlx5_ct_zone_rule zone_rules[2];
+
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct work_struct work;
+
+ refcount_t refcnt;
+ unsigned long flags;
+};
+
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_mod_hdr_handle *mh);
+
+static const struct rhashtable_params cts_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, node),
+ .key_offset = offsetof(struct mlx5_ct_entry, cookie),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static const struct rhashtable_params zone_params = {
+ .head_offset = offsetof(struct mlx5_ct_ft, node),
+ .key_offset = offsetof(struct mlx5_ct_ft, zone),
+ .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
+ .automatic_shrinking = true,
+};
+
+static const struct rhashtable_params tuples_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
+ .key_offset = offsetof(struct mlx5_ct_entry, tuple),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static const struct rhashtable_params tuples_nat_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
+ .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static bool
+mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
+{
+ return !!(entry->tuple_nat_node.next);
+}
+
+static int
+mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
+ u32 *labels, u32 *id)
+{
+ if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
+ *id = 0;
+ return 0;
+ }
+
+ if (mapping_add(ct_priv->labels_mapping, labels, id))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static void
+mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
+{
+ if (id)
+ mapping_remove(ct_priv->labels_mapping, id);
+}
+
+static int
+mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
+{
+ struct flow_match_control control;
+ struct flow_match_basic basic;
+
+ flow_rule_match_basic(rule, &basic);
+ flow_rule_match_control(rule, &control);
+
+ tuple->n_proto = basic.key->n_proto;
+ tuple->ip_proto = basic.key->ip_proto;
+ tuple->addr_type = control.key->addr_type;
+
+ if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ tuple->ip.src_v4 = match.key->src;
+ tuple->ip.dst_v4 = match.key->dst;
+ } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ tuple->ip.src_v6 = match.key->src;
+ tuple->ip.dst_v6 = match.key->dst;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (tuple->ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ tuple->port.src = match.key->src;
+ tuple->port.dst = match.key->dst;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ } else {
+ if (tuple->ip_proto != IPPROTO_GRE)
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
+ struct flow_rule *rule)
+{
+ struct flow_action *flow_action = &rule->action;
+ struct flow_action_entry *act;
+ u32 offset, val, ip6_offset;
+ int i;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id != FLOW_ACTION_MANGLE)
+ continue;
+
+ offset = act->mangle.offset;
+ val = act->mangle.val;
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ if (offset == offsetof(struct iphdr, saddr))
+ tuple->ip.src_v4 = cpu_to_be32(val);
+ else if (offset == offsetof(struct iphdr, daddr))
+ tuple->ip.dst_v4 = cpu_to_be32(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
+ ip6_offset /= 4;
+ if (ip6_offset < 4)
+ tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
+ else if (ip6_offset < 8)
+ tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ if (offset == offsetof(struct tcphdr, source))
+ tuple->port.src = cpu_to_be16(val);
+ else if (offset == offsetof(struct tcphdr, dest))
+ tuple->port.dst = cpu_to_be16(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ if (offset == offsetof(struct udphdr, source))
+ tuple->port.src = cpu_to_be16(val);
+ else if (offset == offsetof(struct udphdr, dest))
+ tuple->port.dst = cpu_to_be16(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct net_device *ndev)
+{
+ struct mlx5e_priv *other_priv = netdev_priv(ndev);
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ bool vf_rep, uplink_rep;
+
+ vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+ uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+
+ if (vf_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ if (uplink_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ if (is_vlan_dev(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
+ if (netif_is_macvlan(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
+ if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
+}
+
+static int
+mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_rule *rule)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+
+ mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ match.mask->ip_proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ match.key->ip_proto);
+
+ ip_proto = match.key->ip_proto;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_match_control match;
+
+ flow_rule_match_control(rule, &match);
+ addr_type = match.key->addr_type;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.key->src, sizeof(match.key->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, sizeof(match.key->src));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_dport, ntohs(match.key->dst));
+ break;
+
+ case IPPROTO_UDP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(match.key->dst));
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_match_tcp match;
+
+ flow_rule_match_tcp(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(match.mask->flags));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
+ ntohs(match.key->flags));
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+ struct flow_match_meta match;
+
+ flow_rule_match_meta(rule, &match);
+
+ if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
+ if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
+ spec->flow_context.flow_source =
+ mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
+
+ dev_put(dev);
+ }
+ }
+
+ return 0;
+}
+
+static void
+mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
+{
+ if (entry->counter->is_shared &&
+ !refcount_dec_and_test(&entry->counter->refcount))
+ return;
+
+ mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
+ kfree(entry->counter);
+}
+
+static void
+mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry,
+ bool nat)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5_flow_attr *attr = zone_rule->attr;
+
+ ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
+
+ ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
+ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+ kfree(attr);
+}
+
+static void
+mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+
+ atomic_dec(&ct_priv->debugfs.stats.offloaded);
+}
+
+static struct flow_action_entry *
+mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct flow_action_entry *act;
+ int i;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT_METADATA)
+ return act;
+ }
+
+ return NULL;
+}
+
+static int
+mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ u8 ct_state,
+ u32 mark,
+ u32 labels_id,
+ u8 zone_restore_id)
+{
+ enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ int err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ CTSTATE_TO_REG, ct_state);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ MARK_TO_REG, mark);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ LABELS_TO_REG, labels_id);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+
+ /* Make another copy of zone id in reg_b for
+ * NIC rx flows since we don't copy reg_c1 to
+ * reg_b upon miss.
+ */
+ if (ns != MLX5_FLOW_NAMESPACE_FDB) {
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+}
+
+static int
+mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
+ char *modact)
+{
+ u32 offset = act->mangle.offset, field;
+
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct iphdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
+ else if (offset == offsetof(struct iphdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct ipv6hdr, saddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct tcphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
+ else if (offset == offsetof(struct tcphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct udphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
+ else if (offset == offsetof(struct udphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, offset, 0);
+ MLX5_SET(set_action_in, modact, field, field);
+ MLX5_SET(set_action_in, modact, data, act->mangle.val);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ struct flow_action_entry *act;
+ char *modact;
+ int err, i;
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_MANGLE: {
+ modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
+ if (IS_ERR(modact))
+ return PTR_ERR(modact);
+
+ err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
+ if (err)
+ return err;
+
+ mod_acts->num_actions++;
+ }
+ break;
+
+ case FLOW_ACTION_CT_METADATA:
+ /* Handled earlier */
+ continue;
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule,
+ struct mlx5e_mod_hdr_handle **mh,
+ u8 zone_restore_id, bool nat_table, bool has_nat)
+{
+ DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
+ DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
+ struct flow_action_entry *meta;
+ u16 ct_state = 0;
+ int err;
+
+ meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta)
+ return -EOPNOTSUPP;
+
+ err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
+ &attr->ct_attr.ct_labels_id);
+ if (err)
+ return -EOPNOTSUPP;
+ if (nat_table) {
+ if (has_nat) {
+ err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
+ if (err)
+ goto err_mapping;
+ }
+
+ ct_state |= MLX5_CT_STATE_NAT_BIT;
+ }
+
+ ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
+ ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
+ err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
+ ct_state,
+ meta->ct_metadata.mark,
+ attr->ct_attr.ct_labels_id,
+ zone_restore_id);
+ if (err)
+ goto err_mapping;
+
+ if (nat_table && has_nat) {
+ attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
+ mod_acts.num_actions,
+ mod_acts.actions);
+ if (IS_ERR(attr->modify_hdr)) {
+ err = PTR_ERR(attr->modify_hdr);
+ goto err_mapping;
+ }
+
+ *mh = NULL;
+ } else {
+ *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl,
+ ct_priv->ns_type,
+ &mod_acts);
+ if (IS_ERR(*mh)) {
+ err = PTR_ERR(*mh);
+ goto err_mapping;
+ }
+ attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
+ }
+
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ return 0;
+
+err_mapping:
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+ return err;
+}
+
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_mod_hdr_handle *mh)
+{
+ if (mh)
+ mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
+ else
+ mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry,
+ bool nat, u8 zone_restore_id)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+ struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_flow_attr *attr;
+ int err;
+
+ zone_rule->nat = nat;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+
+ err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
+ &zone_rule->mh,
+ zone_restore_id,
+ nat,
+ mlx5_tc_ct_entry_has_nat(entry));
+ if (err) {
+ ct_dbg("Failed to create ct entry mod hdr");
+ goto err_mod_hdr;
+ }
+
+ attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = 0;
+ attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ if (entry->tuple.ip_proto == IPPROTO_TCP ||
+ entry->tuple.ip_proto == IPPROTO_UDP)
+ attr->outer_match_level = MLX5_MATCH_L4;
+ else
+ attr->outer_match_level = MLX5_MATCH_L3;
+ attr->counter = entry->counter->counter;
+ attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->in_mdev = priv->mdev;
+
+ mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
+
+ zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
+ if (IS_ERR(zone_rule->rule)) {
+ err = PTR_ERR(zone_rule->rule);
+ ct_dbg("Failed to add ct entry rule, nat: %d", nat);
+ goto err_rule;
+ }
+
+ zone_rule->attr = attr;
+
+ kvfree(spec);
+ ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
+
+ return 0;
+
+err_rule:
+ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+err_mod_hdr:
+ kfree(attr);
+err_attr:
+ kvfree(spec);
+ return err;
+}
+
+static bool
+mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
+{
+ return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+}
+
+static struct mlx5_ct_entry *
+mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
+{
+ struct mlx5_ct_entry *entry;
+
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
+ tuples_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt)) {
+ return entry;
+ } else if (!entry) {
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
+ tuple, tuples_nat_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt))
+ return entry;
+ }
+
+ return entry ? ERR_PTR(-EINVAL) : NULL;
+}
+
+static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
+ tuples_ht_params);
+}
+
+static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_remove_from_tuples(entry);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ kfree(entry);
+}
+
+static void
+mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
+{
+ struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void
+__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
+ queue_work(entry->ct_priv->wq, &entry->work);
+}
+
+static struct mlx5_ct_counter *
+mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_ct_counter *counter;
+ int ret;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return ERR_PTR(-ENOMEM);
+
+ counter->is_shared = false;
+ counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
+ if (IS_ERR(counter->counter)) {
+ ct_dbg("Failed to create counter for ct entry");
+ ret = PTR_ERR(counter->counter);
+ kfree(counter);
+ return ERR_PTR(ret);
+ }
+
+ return counter;
+}
+
+static struct mlx5_ct_counter *
+mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ struct mlx5_ct_tuple rev_tuple = entry->tuple;
+ struct mlx5_ct_counter *shared_counter;
+ struct mlx5_ct_entry *rev_entry;
+
+ /* get the reversed tuple */
+ swap(rev_tuple.port.src, rev_tuple.port.dst);
+
+ if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ __be32 tmp_addr = rev_tuple.ip.src_v4;
+
+ rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
+ rev_tuple.ip.dst_v4 = tmp_addr;
+ } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
+
+ rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
+ rev_tuple.ip.dst_v6 = tmp_addr;
+ } else {
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* Use the same counter as the reverse direction */
+ spin_lock_bh(&ct_priv->ht_lock);
+ rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
+
+ if (IS_ERR(rev_entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ goto create_counter;
+ }
+
+ if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
+ ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
+ shared_counter = rev_entry->counter;
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(rev_entry);
+ return shared_counter;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+create_counter:
+
+ shared_counter = mlx5_tc_ct_counter_create(ct_priv);
+ if (IS_ERR(shared_counter))
+ return shared_counter;
+
+ shared_counter->is_shared = true;
+ refcount_set(&shared_counter->refcount, 1);
+ return shared_counter;
+}
+
+static int
+mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry,
+ u8 zone_restore_id)
+{
+ int err;
+
+ if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
+ entry->counter = mlx5_tc_ct_counter_create(ct_priv);
+ else
+ entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
+
+ if (IS_ERR(entry->counter)) {
+ err = PTR_ERR(entry->counter);
+ return err;
+ }
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
+ zone_restore_id);
+ if (err)
+ goto err_orig;
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
+ zone_restore_id);
+ if (err)
+ goto err_nat;
+
+ atomic_inc(&ct_priv->debugfs.stats.offloaded);
+ return 0;
+
+err_nat:
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+err_orig:
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ struct flow_action_entry *meta_action;
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+ int err;
+
+ meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta_action)
+ return -EOPNOTSUPP;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (entry && refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_put(entry);
+ return -EEXIST;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->tuple.zone = ft->zone;
+ entry->cookie = flow->cookie;
+ entry->restore_cookie = meta_action->ct_metadata.cookie;
+ refcount_set(&entry->refcnt, 2);
+ entry->ct_priv = ct_priv;
+
+ err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
+ if (err)
+ goto err_set;
+
+ memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
+ err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
+ if (err)
+ goto err_set;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+
+ err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
+ cts_ht_params);
+ if (err)
+ goto err_entries;
+
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
+ if (err)
+ goto err_tuple;
+
+ if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ if (err)
+ goto err_tuple_nat;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
+ ft->zone_restore_id);
+ if (err)
+ goto err_rules;
+
+ set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+ mlx5_tc_ct_entry_put(entry); /* this function reference */
+
+ return 0;
+
+err_rules:
+ spin_lock_bh(&ct_priv->ht_lock);
+ if (mlx5_tc_ct_entry_has_nat(entry))
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node, tuples_nat_ht_params);
+err_tuple_nat:
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
+err_tuple:
+ rhashtable_remove_fast(&ft->ct_entries_ht,
+ &entry->node,
+ cts_ht_params);
+err_entries:
+ spin_unlock_bh(&ct_priv->ht_lock);
+err_set:
+ kfree(entry);
+ if (err != -EEXIST)
+ netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(entry);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *f)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ unsigned long cookie = f->cookie;
+ struct mlx5_ct_entry *entry;
+ u64 lastuse, packets, bytes;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
+ flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+
+ mlx5_tc_ct_entry_put(entry);
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload *f = type_data;
+ struct mlx5_ct_ft *ft = cb_priv;
+
+ if (type != TC_SETUP_CLSFLOWER)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5_tc_ct_block_flow_offload_add(ft, f);
+ case FLOW_CLS_DESTROY:
+ return mlx5_tc_ct_block_flow_offload_del(ft, f);
+ case FLOW_CLS_STATS:
+ return mlx5_tc_ct_block_flow_offload_stats(ft, f);
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static bool
+mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
+ u16 zone)
+{
+ struct flow_keys flow_keys;
+
+ skb_reset_network_header(skb);
+ skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
+
+ tuple->zone = zone;
+
+ if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
+ flow_keys.basic.ip_proto != IPPROTO_UDP &&
+ flow_keys.basic.ip_proto != IPPROTO_GRE)
+ return false;
+
+ if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
+ flow_keys.basic.ip_proto == IPPROTO_UDP) {
+ tuple->port.src = flow_keys.ports.src;
+ tuple->port.dst = flow_keys.ports.dst;
+ }
+ tuple->n_proto = flow_keys.basic.n_proto;
+ tuple->ip_proto = flow_keys.basic.ip_proto;
+
+ switch (flow_keys.basic.n_proto) {
+ case htons(ETH_P_IP):
+ tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
+ tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
+ break;
+
+ case htons(ETH_P_IPV6):
+ tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
+ tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
+ break;
+ default:
+ goto out;
+ }
+
+ return true;
+
+out:
+ return false;
+}
+
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
+{
+ u32 ctstate = 0, ctstate_mask = 0;
+
+ mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
+ &ctstate, &ctstate_mask);
+
+ if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
+ return -EOPNOTSUPP;
+
+ ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
+ ctstate, ctstate_mask);
+
+ return 0;
+}
+
+void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
+{
+ if (!priv || !ct_attr->ct_labels_id)
+ return;
+
+ mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
+}
+
+int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack)
+{
+ bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_dissector_key_ct *mask, *key;
+ u32 ctstate = 0, ctstate_mask = 0;
+ u16 ct_state_on, ct_state_off;
+ u16 ct_state, ct_state_mask;
+ struct flow_match_ct match;
+ u32 ct_labels[4];
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
+ return 0;
+
+ if (!priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct matching isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_ct(rule, &match);
+
+ key = match.key;
+ mask = match.mask;
+
+ ct_state = key->ct_state;
+ ct_state_mask = mask->ct_state;
+
+ if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+ TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
+ TCA_FLOWER_KEY_CT_FLAGS_NEW |
+ TCA_FLOWER_KEY_CT_FLAGS_REPLY |
+ TCA_FLOWER_KEY_CT_FLAGS_RELATED |
+ TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "only ct_state trk, est, new and rpl are supported for offload");
+ return -EOPNOTSUPP;
+ }
+
+ ct_state_on = ct_state & ct_state_mask;
+ ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
+ trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
+ est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+ rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+ inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+ untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+ unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+ uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+
+ ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
+ ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
+ ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
+ ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
+
+ if (rel) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +rel isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (inv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +inv isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (new) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +new isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (mask->ct_zone)
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ key->ct_zone, MLX5_CT_ZONE_MASK);
+ if (ctstate_mask)
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
+ ctstate, ctstate_mask);
+ if (mask->ct_mark)
+ mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
+ key->ct_mark, mask->ct_mark);
+ if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
+ mask->ct_labels[3]) {
+ ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
+ ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
+ ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
+ ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
+ if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
+ return -EOPNOTSUPP;
+ mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
+ MLX5_CT_LABELS_MASK);
+ }
+
+ return 0;
+}
+
+int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (!priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct action isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ attr->ct_attr.zone = act->ct.zone;
+ attr->ct_attr.ct_action = act->ct.action;
+ attr->ct_attr.nf_ft = act->ct.flow_table;
+
+ return 0;
+}
+
+static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table *ft = pre_ct->ft;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 ctstate;
+ u16 zone;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
+ err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
+ ZONE_TO_REG, zone);
+ if (err) {
+ ct_dbg("Failed to set zone register mapping");
+ goto err_mapping;
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
+ pre_mod_acts.num_actions,
+ pre_mod_acts.actions);
+
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct->modify_hdr = mod_hdr;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.modify_hdr = mod_hdr;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+ /* add flow rule */
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ zone, MLX5_CT_ZONE_MASK);
+ ctstate = MLX5_CT_STATE_TRK_BIT;
+ if (nat)
+ ctstate |= MLX5_CT_STATE_NAT_BIT;
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
+
+ dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct flow rule zone %d", zone);
+ goto err_flow_rule;
+ }
+ pre_ct->flow_rule = rule;
+
+ /* add miss rule */
+ dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct miss rule zone %d", zone);
+ goto err_miss_rule;
+ }
+ pre_ct->miss_rule = rule;
+
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
+ kvfree(spec);
+ return 0;
+
+err_miss_rule:
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+err_flow_rule:
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+err_mapping:
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
+ kvfree(spec);
+ return err;
+}
+
+static void
+tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+ mlx5_del_flow_rules(pre_ct->miss_rule);
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *g;
+ u32 metadata_reg_c_2_mask;
+ u32 *flow_group_in;
+ void *misc;
+ int err;
+
+ ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ ct_dbg("Failed to get flow namespace");
+ return err;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
+ FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to create pre ct table");
+ goto out_free;
+ }
+ pre_ct->ft = ft;
+
+ /* create flow group */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria.misc_parameters_2);
+
+ metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
+ if (nat)
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
+ metadata_reg_c_2_mask);
+
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct group");
+ goto err_flow_grp;
+ }
+ pre_ct->flow_grp = g;
+
+ /* create miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct miss group");
+ goto err_miss_grp;
+ }
+ pre_ct->miss_grp = g;
+
+ err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
+ if (err)
+ goto err_add_rules;
+
+ kvfree(flow_group_in);
+ return 0;
+
+err_add_rules:
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+err_miss_grp:
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+err_flow_grp:
+ mlx5_destroy_flow_table(ft);
+out_free:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+ mlx5_destroy_flow_table(pre_ct->ft);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ int err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
+ if (err)
+ return err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
+ if (err)
+ goto err_pre_ct_nat;
+
+ return 0;
+
+err_pre_ct_nat:
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+}
+
+/* To avoid false lock dependency warning set the ct_entries_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key ct_entries_ht_lock_key;
+
+static struct mlx5_ct_ft *
+mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
+ struct nf_flowtable *nf_ft)
+{
+ struct mlx5_ct_ft *ft;
+ int err;
+
+ ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
+ if (ft) {
+ refcount_inc(&ft->refcount);
+ return ft;
+ }
+
+ ft = kzalloc(sizeof(*ft), GFP_KERNEL);
+ if (!ft)
+ return ERR_PTR(-ENOMEM);
+
+ err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
+ if (err)
+ goto err_mapping;
+
+ ft->zone = zone;
+ ft->nf_ft = nf_ft;
+ ft->ct_priv = ct_priv;
+ refcount_set(&ft->refcount, 1);
+
+ err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
+ if (err)
+ goto err_alloc_pre_ct;
+
+ err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
+ if (err)
+ goto err_init;
+
+ lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
+
+ err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
+ zone_params);
+ if (err)
+ goto err_insert;
+
+ err = nf_flow_table_offload_add_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ if (err)
+ goto err_add_cb;
+
+ return ft;
+
+err_add_cb:
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+err_insert:
+ rhashtable_destroy(&ft->ct_entries_ht);
+err_init:
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+err_alloc_pre_ct:
+ mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
+err_mapping:
+ kfree(ft);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
+{
+ struct mlx5_ct_entry *entry = ptr;
+
+ mlx5_tc_ct_entry_put(entry);
+}
+
+static void
+mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
+{
+ if (!refcount_dec_and_test(&ft->refcount))
+ return;
+
+ flush_workqueue(ct_priv->wq);
+ nf_flow_table_offload_del_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+ rhashtable_free_and_destroy(&ft->ct_entries_ht,
+ mlx5_tc_ct_flush_ft_entry,
+ ct_priv);
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+ mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
+ kfree(ft);
+}
+
+/* We translate the tc filter with CT action to the following HW model:
+ *
+ * +---------------------+
+ * + ft prio (tc chain) +
+ * + original match +
+ * +---------------------+
+ * | set chain miss mapping
+ * | set fte_id
+ * | set tunnel_id
+ * | do decap
+ * v
+ * +---------------------+
+ * + pre_ct/pre_ct_nat + if matches +-------------------------+
+ * + zone+nat match +---------------->+ post_act (see below) +
+ * +---------------------+ set zone +-------------------------+
+ * | set zone
+ * v
+ * +--------------------+
+ * + CT (nat or no nat) +
+ * + tuple + zone match +
+ * +--------------------+
+ * | set mark
+ * | set labels_id
+ * | set established
+ * | set zone_restore
+ * | do nat (if needed)
+ * v
+ * +--------------+
+ * + post_act + original filter actions
+ * + fte_id match +------------------------>
+ * +--------------+
+ */
+static struct mlx5_flow_handle *
+__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *orig_spec,
+ struct mlx5_flow_attr *attr)
+{
+ bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+ struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
+ u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_ct_flow *ct_flow;
+ int chain_mapping = 0, err;
+ struct mlx5_ct_ft *ft;
+
+ ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
+ if (!ct_flow) {
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Register for CT established events */
+ ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
+ attr->ct_attr.nf_ft);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to register to ft callback");
+ goto err_ft;
+ }
+ ct_flow->ft = ft;
+
+ /* Base flow attributes of both rules on original rule attribute */
+ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!ct_flow->pre_ct_attr) {
+ err = -ENOMEM;
+ goto err_alloc_pre;
+ }
+
+ pre_ct_attr = ct_flow->pre_ct_attr;
+ memcpy(pre_ct_attr, attr, attr_sz);
+ pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
+
+ /* Modify the original rule's action to fwd and modify, leave decap */
+ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Write chain miss tag for miss in ct table as we
+ * don't go though all prios of this chain as normal tc rules
+ * miss.
+ */
+ err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
+ &chain_mapping);
+ if (err) {
+ ct_dbg("Failed to get chain register mapping for chain");
+ goto err_get_chain;
+ }
+ ct_flow->chain_mapping = chain_mapping;
+
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
+ CHAIN_TO_REG, chain_mapping);
+ if (err) {
+ ct_dbg("Failed to set chain register mapping");
+ goto err_mapping;
+ }
+
+ /* If original flow is decap, we do it before going into ct table
+ * so add a rewrite for the tunnel match_id.
+ */
+ if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ attr->chain == 0) {
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
+ ct_priv->ns_type,
+ TUNNEL_TO_REG,
+ attr->tunnel_id);
+ if (err) {
+ ct_dbg("Failed to set tunnel register mapping");
+ goto err_mapping;
+ }
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
+ pre_mod_acts->num_actions,
+ pre_mod_acts->actions);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct_attr->modify_hdr = mod_hdr;
+
+ /* Change original rule point to ct table */
+ pre_ct_attr->dest_chain = 0;
+ pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
+ ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
+ pre_ct_attr);
+ if (IS_ERR(ct_flow->pre_ct_rule)) {
+ err = PTR_ERR(ct_flow->pre_ct_rule);
+ ct_dbg("Failed to add pre ct rule");
+ goto err_insert_orig;
+ }
+
+ attr->ct_attr.ct_flow = ct_flow;
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
+
+ return ct_flow->pre_ct_rule;
+
+err_insert_orig:
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
+err_mapping:
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+err_get_chain:
+ kfree(ct_flow->pre_ct_attr);
+err_alloc_pre:
+ mlx5_tc_ct_del_ft_cb(ct_priv, ft);
+err_ft:
+ kfree(ct_flow);
+ netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
+ return ERR_PTR(err);
+}
+
+struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ struct mlx5_flow_handle *rule;
+
+ if (!priv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&priv->control_lock);
+ rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
+ mutex_unlock(&priv->control_lock);
+
+ return rule;
+}
+
+static void
+__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_flow *ct_flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+
+ mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
+
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+ mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
+
+ kfree(ct_flow->pre_ct_attr);
+ kfree(ct_flow);
+}
+
+void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
+
+ /* We are called on error to clean up stuff from parsing
+ * but we don't have anything for now
+ */
+ if (!ct_flow)
+ return;
+
+ mutex_lock(&priv->control_lock);
+ __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
+ mutex_unlock(&priv->control_lock);
+}
+
+static int
+mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
+ int err;
+
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
+ ct_dbg("Using SMFS ct flow steering provider");
+ fs_ops = mlx5_ct_fs_smfs_ops_get();
+ }
+
+ ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
+ if (!ct_priv->fs)
+ return -ENOMEM;
+
+ ct_priv->fs->netdev = ct_priv->netdev;
+ ct_priv->fs->dev = ct_priv->dev;
+ ct_priv->fs_ops = fs_ops;
+
+ err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
+ if (err)
+ goto err_init;
+
+ return 0;
+
+err_init:
+ kfree(ct_priv->fs);
+ return err;
+}
+
+static int
+mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
+ const char **err_msg)
+{
+ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
+ /* vlan workaround should be avoided for multi chain rules.
+ * This is just a sanity check as pop vlan action should
+ * be supported by any FW that supports ignore_flow_level
+ */
+
+ *err_msg = "firmware vlan actions support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
+ fdb_modify_header_fwd_to_table)) {
+ /* CT always writes to registers which are mod header actions.
+ * Therefore, mod header and goto is required
+ */
+
+ *err_msg = "firmware fwd and modify support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ *err_msg = "register loopback isn't supported";
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ const char *err_msg = NULL;
+ int err = 0;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ /* cannot restore chain ID on HW miss */
+
+ err_msg = "tc skb extension missing";
+ err = -EOPNOTSUPP;
+ goto out_err;
+#endif
+ if (IS_ERR_OR_NULL(post_act)) {
+ /* Ignore_flow_level support isn't supported by default for VFs and so post_act
+ * won't be supported. Skip showing error msg.
+ */
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
+ err_msg = "post action is missing";
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
+
+out_err:
+ if (err && err_msg)
+ netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
+ return err;
+}
+
+static void
+mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
+
+ ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev));
+ debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.offloaded);
+ debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.rx_dropped);
+}
+
+static void
+mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ debugfs_remove_recursive(ct_priv->debugfs.root);
+}
+
+#define INIT_ERR_PREFIX "tc ct offload init failed"
+
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct mlx5_core_dev *dev;
+ u64 mapping_id;
+ int err;
+
+ dev = priv->mdev;
+ err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
+ if (err)
+ goto err_support;
+
+ ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
+ if (!ct_priv)
+ goto err_alloc;
+
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+ sizeof(u16), 0, true);
+ if (IS_ERR(ct_priv->zone_mapping)) {
+ err = PTR_ERR(ct_priv->zone_mapping);
+ goto err_mapping_zone;
+ }
+
+ ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+ sizeof(u32) * 4, 0, true);
+ if (IS_ERR(ct_priv->labels_mapping)) {
+ err = PTR_ERR(ct_priv->labels_mapping);
+ goto err_mapping_labels;
+ }
+
+ spin_lock_init(&ct_priv->ht_lock);
+ ct_priv->ns_type = ns_type;
+ ct_priv->chains = chains;
+ ct_priv->netdev = priv->netdev;
+ ct_priv->dev = priv->mdev;
+ ct_priv->mod_hdr_tbl = mod_hdr;
+ ct_priv->ct = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(ct_priv->ct)) {
+ err = PTR_ERR(ct_priv->ct);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct table err: %d\n",
+ INIT_ERR_PREFIX, err);
+ goto err_ct_tbl;
+ }
+
+ ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(ct_priv->ct_nat)) {
+ err = PTR_ERR(ct_priv->ct_nat);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct nat table err: %d\n",
+ INIT_ERR_PREFIX, err);
+ goto err_ct_nat_tbl;
+ }
+
+ ct_priv->post_act = post_act;
+ mutex_init(&ct_priv->control_lock);
+ if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
+ goto err_ct_zone_ht;
+ if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
+ goto err_ct_tuples_ht;
+ if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
+ goto err_ct_tuples_nat_ht;
+
+ ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
+ if (!ct_priv->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ err = mlx5_tc_ct_fs_init(ct_priv);
+ if (err)
+ goto err_init_fs;
+
+ mlx5_ct_tc_create_dbgfs(ct_priv);
+ return ct_priv;
+
+err_init_fs:
+ destroy_workqueue(ct_priv->wq);
+err_wq:
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
+err_ct_tuples_nat_ht:
+ rhashtable_destroy(&ct_priv->ct_tuples_ht);
+err_ct_tuples_ht:
+ rhashtable_destroy(&ct_priv->zone_ht);
+err_ct_zone_ht:
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+err_ct_nat_tbl:
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
+err_ct_tbl:
+ mapping_destroy(ct_priv->labels_mapping);
+err_mapping_labels:
+ mapping_destroy(ct_priv->zone_mapping);
+err_mapping_zone:
+ kfree(ct_priv);
+err_alloc:
+err_support:
+
+ return NULL;
+}
+
+void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_fs_chains *chains;
+
+ if (!ct_priv)
+ return;
+
+ destroy_workqueue(ct_priv->wq);
+ mlx5_ct_tc_remove_dbgfs(ct_priv);
+ chains = ct_priv->chains;
+
+ ct_priv->fs_ops->destroy(ct_priv->fs);
+ kfree(ct_priv->fs);
+
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
+ mapping_destroy(ct_priv->zone_mapping);
+ mapping_destroy(ct_priv->labels_mapping);
+
+ rhashtable_destroy(&ct_priv->ct_tuples_ht);
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
+ rhashtable_destroy(&ct_priv->zone_ht);
+ mutex_destroy(&ct_priv->control_lock);
+ kfree(ct_priv);
+}
+
+bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id)
+{
+ struct mlx5_ct_tuple tuple = {};
+ struct mlx5_ct_entry *entry;
+ u16 zone;
+
+ if (!ct_priv || !zone_restore_id)
+ return true;
+
+ if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
+ goto out_inc_drop;
+
+ if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
+ goto out_inc_drop;
+
+ spin_lock(&ct_priv->ht_lock);
+
+ entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
+ if (!entry) {
+ spin_unlock(&ct_priv->ht_lock);
+ goto out_inc_drop;
+ }
+
+ if (IS_ERR(entry)) {
+ spin_unlock(&ct_priv->ht_lock);
+ goto out_inc_drop;
+ }
+ spin_unlock(&ct_priv->ht_lock);
+
+ tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
+ __mlx5_tc_ct_entry_put(entry);
+
+ return true;
+
+out_inc_drop:
+ atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
+ return false;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
new file mode 100644
index 000000000..5bbd6b928
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_CT_H__
+#define __MLX5_EN_TC_CT_H__
+
+#include <net/pkt_cls.h>
+#include <linux/mlx5/fs.h>
+#include <net/tc_act/tc_ct.h>
+
+#include "en.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_tc_mod_hdr_acts;
+struct mlx5_rep_uplink_priv;
+struct mlx5e_tc_flow;
+struct mlx5e_priv;
+
+struct mlx5_fs_chains;
+struct mlx5_tc_ct_priv;
+struct mlx5_ct_flow;
+
+struct nf_flowtable;
+
+struct mlx5_ct_attr {
+ u16 zone;
+ u16 ct_action;
+ struct mlx5_ct_flow *ct_flow;
+ struct nf_flowtable *nf_ft;
+ u32 ct_labels_id;
+};
+
+#define zone_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\
+ .moffset = 0,\
+ .mlen = 16,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_2),\
+}
+
+#define ctstate_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\
+ .moffset = 16,\
+ .mlen = 16,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_2),\
+}
+
+#define mark_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\
+ .moffset = 0,\
+ .mlen = 32,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_3),\
+}
+
+#define labels_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\
+ .moffset = 0,\
+ .mlen = 32,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_4),\
+}
+
+/* 8 LSB of metadata C5 are reserved for packet color */
+#define fteid_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\
+ .moffset = 8,\
+ .mlen = 24,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_5),\
+}
+
+#define zone_restore_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\
+ .moffset = 0,\
+ .mlen = ESW_ZONE_ID_BITS,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_1),\
+}
+
+#define nic_zone_restore_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\
+ .moffset = 16,\
+ .mlen = ESW_ZONE_ID_BITS,\
+}
+
+#define MLX5_CT_ZONE_BITS MLX5_REG_MAPPING_MBITS(ZONE_TO_REG)
+#define MLX5_CT_ZONE_MASK MLX5_REG_MAPPING_MASK(ZONE_TO_REG)
+
+#if IS_ENABLED(CONFIG_MLX5_TC_CT)
+
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act);
+void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv);
+
+void
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr);
+
+int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack);
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec);
+int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack);
+
+struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id);
+
+int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts);
+
+#else /* CONFIG_MLX5_TC_CT */
+
+static inline struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ return NULL;
+}
+
+static inline void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
+{
+}
+
+static inline void
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) {}
+
+static inline int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
+ return 0;
+
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
+{
+ return 0;
+}
+
+static inline int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
+ return -EOPNOTSUPP;
+}
+
+static inline struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+}
+
+static inline bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id)
+{
+ if (!zone_restore_id)
+ return true;
+
+ return false;
+}
+
+#endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */
+#endif /* __MLX5_EN_TC_CT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
new file mode 100644
index 000000000..2e42d7c54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_PRIV_H__
+#define __MLX5_EN_TC_PRIV_H__
+
+#include "en_tc.h"
+#include "en/tc/act/act.h"
+
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
+
+#define MLX5E_TC_MAX_SPLITS 1
+
+
+enum {
+ MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
+ MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
+ MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
+ MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
+ MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
+ MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
+ MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
+ MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10,
+ MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 11,
+};
+
+struct mlx5e_tc_flow_parse_attr {
+ const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct net_device *filter_dev;
+ struct mlx5_flow_spec spec;
+ struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
+ struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_act_parse_state parse_state;
+};
+
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc);
+
+/* Helper struct for accessing a struct containing list_head array.
+ * Containing struct
+ * |- Helper array
+ * [0] Helper item 0
+ * |- list_head item 0
+ * |- index (0)
+ * [1] Helper item 1
+ * |- list_head item 1
+ * |- index (1)
+ * To access the containing struct from one of the list_head items:
+ * 1. Get the helper item from the list_head item using
+ * helper item =
+ * container_of(list_head item, helper struct type, list_head field)
+ * 2. Get the contining struct from the helper item and its index in the array:
+ * containing struct =
+ * container_of(helper item, containing struct type, helper field[index])
+ */
+struct encap_flow_item {
+ struct mlx5e_encap_entry *e; /* attached encap instance */
+ struct list_head list;
+ int index;
+};
+
+struct encap_route_flow_item {
+ struct mlx5e_route_entry *r; /* attached route instance */
+ int index;
+};
+
+struct mlx5e_tc_flow {
+ struct rhash_head node;
+ struct mlx5e_priv *priv;
+ u64 cookie;
+ unsigned long flags;
+ struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+
+ /* flows sharing the same reformat object - currently mpls decap */
+ struct list_head l3_to_l2_reformat;
+ struct mlx5e_decap_entry *decap_reformat;
+
+ /* flows sharing same route entry */
+ struct list_head decap_routes;
+ struct mlx5e_route_entry *decap_route;
+ struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS];
+
+ /* Flow can be associated with multiple encap IDs.
+ * The number of encaps is bounded by the number of supported
+ * destinations.
+ */
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
+ struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */
+ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
+ struct list_head hairpin; /* flows sharing the same hairpin */
+ struct list_head peer; /* flows with peer flow */
+ struct list_head unready; /* flows not ready to be offloaded (e.g
+ * due to missing route)
+ */
+ struct net_device *orig_dev; /* netdev adding flow first */
+ int tmp_entry_index;
+ struct list_head tmp_list; /* temporary flow list used by neigh update */
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
+ struct completion init_done;
+ struct completion del_hw_done;
+ struct mlx5_flow_attr *attr;
+ struct list_head attrs;
+ u32 chain_mapping;
+};
+
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer);
+
+struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5_flow_attr *
+mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow);
+
+void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow);
+int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow);
+
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow);
+bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow);
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow);
+int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow);
+bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
+
+static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before setting bit. */
+ smp_mb__before_atomic();
+ set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
+ unsigned long flag)
+{
+ /* test_and_set_bit() provides all necessary barriers */
+ return test_and_set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_test_and_set(flow, flag) \
+ __flow_flag_test_and_set(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before clearing bit. */
+ smp_mb__before_atomic();
+ clear_bit(flag, &flow->flags);
+}
+
+#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ bool ret = test_bit(flag, &flow->flags);
+
+ /* Read fields of flow structure only after checking flags. */
+ smp_mb__after_atomic();
+ return ret;
+}
+
+#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow);
+struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow);
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow);
+
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow);
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_get_int_port_priv(struct mlx5e_priv *priv);
+
+struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev);
+
+void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec);
+void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec);
+
+int mlx5e_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_PRIV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
new file mode 100644
index 000000000..83bb0811e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -0,0 +1,991 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/inet_ecn.h>
+#include <net/vxlan.h>
+#include <net/gre.h>
+#include <net/geneve.h>
+#include <net/bareudp.h>
+#include "en/tc_tun.h"
+#include "en/tc_priv.h"
+#include "en_tc.h"
+#include "rep/tc.h"
+#include "rep/neigh.h"
+#include "lag/lag.h"
+#include "lag/mp.h"
+
+struct mlx5e_tc_tun_route_attr {
+ struct net_device *out_dev;
+ struct net_device *route_dev;
+ union {
+ struct flowi4 fl4;
+ struct flowi6 fl6;
+ } fl;
+ struct neighbour *n;
+ u8 ttl;
+};
+
+#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {}
+
+static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr)
+{
+ if (attr->n)
+ neigh_release(attr->n);
+ if (attr->route_dev)
+ dev_put(attr->route_dev);
+}
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
+{
+ if (netif_is_vxlan(tunnel_dev))
+ return &vxlan_tunnel;
+ else if (netif_is_geneve(tunnel_dev))
+ return &geneve_tunnel;
+ else if (netif_is_gretap(tunnel_dev) ||
+ netif_is_ip6gretap(tunnel_dev))
+ return &gre_tunnel;
+ else if (netif_is_bareudp(tunnel_dev))
+ return &mplsoudp_tunnel;
+ else
+ return NULL;
+}
+
+static int get_route_and_out_devs(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct net_device **route_dev,
+ struct net_device **out_dev)
+{
+ struct net_device *uplink_dev, *uplink_upper, *real_dev;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool dst_is_lag_dev;
+
+ real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+
+ rcu_read_lock();
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+ /* mlx5_lag_is_sriov() is a blocking function which can't be called
+ * while holding rcu read lock. Take the net_device for correctness
+ * sake.
+ */
+ if (uplink_upper)
+ dev_hold(uplink_upper);
+ rcu_read_unlock();
+
+ dst_is_lag_dev = (uplink_upper &&
+ netif_is_lag_master(uplink_upper) &&
+ real_dev == uplink_upper &&
+ mlx5_lag_is_sriov(priv->mdev));
+ if (uplink_upper)
+ dev_put(uplink_upper);
+
+ /* if the egress device isn't on the same HW e-switch or
+ * it's a LAG device, use the uplink
+ */
+ *route_dev = dev;
+ if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
+ dst_is_lag_dev || is_vlan_dev(*route_dev) ||
+ netif_is_ovs_master(*route_dev))
+ *out_dev = uplink_dev;
+ else if (mlx5e_eswitch_rep(dev) &&
+ mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
+ *out_dev = *route_dev;
+ else
+ return -EOPNOTSUPP;
+
+ if (!(mlx5e_eswitch_rep(*out_dev) &&
+ mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
+ return -EOPNOTSUPP;
+
+ if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct mlx5e_tc_tun_route_attr *attr)
+{
+ struct net_device *route_dev;
+ struct net_device *out_dev;
+ struct neighbour *n;
+ struct rtable *rt;
+
+#if IS_ENABLED(CONFIG_INET)
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *uplink_dev;
+ int ret;
+
+ if (mlx5_lag_is_multipath(mdev)) {
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
+ } else {
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
+
+ if (tunnel && tunnel->get_remote_ifindex)
+ attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev);
+ }
+
+ rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ if (rt->rt_type != RTN_UNICAST) {
+ ret = -ENETUNREACH;
+ goto err_rt_release;
+ }
+
+ if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
+ ret = -ENETUNREACH;
+ goto err_rt_release;
+ }
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_rt_release;
+ dev_hold(route_dev);
+
+ if (!attr->ttl)
+ attr->ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr);
+ if (!n) {
+ ret = -ENOMEM;
+ goto err_dev_release;
+ }
+
+ ip_rt_put(rt);
+ attr->route_dev = route_dev;
+ attr->out_dev = out_dev;
+ attr->n = n;
+ return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_rt_release:
+ ip_rt_put(rt);
+ return ret;
+}
+
+static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr)
+{
+ mlx5e_tc_tun_route_attr_cleanup(attr);
+}
+
+static const char *mlx5e_netdev_kind(struct net_device *dev)
+{
+ if (dev->rtnl_link_ops)
+ return dev->rtnl_link_ops->kind;
+ else
+ return "unknown";
+}
+
+static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ if (!e->tunnel) {
+ pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
+ return -EOPNOTSUPP;
+ }
+
+ return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
+}
+
+static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
+ struct mlx5e_encap_entry *e,
+ u16 proto)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ char *ip;
+
+ ether_addr_copy(eth->h_dest, e->h_dest);
+ ether_addr_copy(eth->h_source, dev->dev_addr);
+ if (is_vlan_dev(dev)) {
+ struct vlan_hdr *vlan = (struct vlan_hdr *)
+ ((char *)eth + ETH_HLEN);
+ ip = (char *)vlan + VLAN_HLEN;
+ eth->h_proto = vlan_dev_vlan_proto(dev);
+ vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
+ vlan->h_vlan_encapsulated_proto = htons(proto);
+ } else {
+ eth->h_proto = htons(proto);
+ ip = (char *)eth + ETH_HLEN;
+ }
+
+ return ip;
+}
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ int ipv4_encap_size;
+ char *encap_header;
+ struct iphdr *ip;
+ u8 nud_state;
+ int err;
+
+ /* add the IP fields */
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ /* It's important to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv4_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv4_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto destroy_neigh_entry;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+}
+
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ int ipv4_encap_size;
+ char *encap_header;
+ struct iphdr *ip;
+ u8 nud_state;
+ int err;
+
+ /* add the IP fields */
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv4_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv4_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+}
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct mlx5e_tc_tun_route_attr *attr)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
+ struct net_device *route_dev;
+ struct net_device *out_dev;
+ struct dst_entry *dst;
+ struct neighbour *n;
+ int ret;
+
+ if (tunnel && tunnel->get_remote_ifindex)
+ attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev);
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6,
+ NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+
+ if (!attr->ttl)
+ attr->ttl = ip6_dst_hoplimit(dst);
+
+ ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_dst_release;
+
+ dev_hold(route_dev);
+ n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr);
+ if (!n) {
+ ret = -ENOMEM;
+ goto err_dev_release;
+ }
+
+ dst_release(dst);
+ attr->out_dev = out_dev;
+ attr->route_dev = route_dev;
+ attr->n = n;
+ return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_dst_release:
+ dst_release(dst);
+ return ret;
+}
+
+static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr)
+{
+ mlx5e_tc_tun_route_attr_cleanup(attr);
+}
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state;
+ int err;
+
+ attr.ttl = tun_key->ttl;
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ /* It's important to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv6_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv6_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto destroy_neigh_entry;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+}
+
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state;
+ int err;
+
+ attr.ttl = tun_key->ttl;
+
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv6_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv6_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+}
+#endif
+
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *flow_attr,
+ struct net_device *filter_dev)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_int_port *int_port;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ u16 vport_num;
+ int err = 0;
+
+ if (flow_attr->tun_ip_version == 4) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
+ attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
+ err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr);
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->tun_ip_version == 6) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
+ attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
+ err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr);
+ }
+#endif
+ else
+ return 0;
+
+ if (err)
+ return err;
+
+ if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops &&
+ mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) {
+ err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ esw_attr->rx_tun_attr->decap_vport = vport_num;
+ } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
+ int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
+ attr.route_dev->ifindex,
+ MLX5E_TC_INT_PORT_INGRESS);
+ if (IS_ERR(int_port)) {
+ err = PTR_ERR(int_port);
+ goto out;
+ }
+ esw_attr->int_port = int_port;
+ }
+
+out:
+ if (flow_attr->tun_ip_version == 4)
+ mlx5e_route_lookup_ipv4_put(&attr);
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->tun_ip_version == 6)
+ mlx5e_route_lookup_ipv6_put(&attr);
+#endif
+ return err;
+}
+
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
+
+ if (tunnel && tunnel->can_offload(priv))
+ return true;
+ else
+ return false;
+}
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
+
+ if (!tunnel) {
+ e->reformat_type = -1;
+ return -EOPNOTSUPP;
+ }
+
+ return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
+}
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ u8 *match_level)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ struct netlink_ext_ack *extack = f->common.extack;
+ int err = 0;
+
+ if (!tunnel) {
+ netdev_warn(priv->netdev,
+ "decapsulation offload is not supported for %s net device\n",
+ mlx5e_netdev_kind(filter_dev));
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ *match_level = tunnel->match_level;
+
+ if (tunnel->parse_udp_ports) {
+ err = tunnel->parse_udp_ports(priv, spec, f,
+ headers_c, headers_v);
+ if (err)
+ goto out;
+ }
+
+ if (tunnel->parse_tunnel) {
+ err = tunnel->parse_tunnel(priv, spec, f,
+ headers_c, headers_v);
+ if (err)
+ goto out;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_dissector_key_basic key_basic = {};
+ struct flow_dissector_key_basic mask_basic = {
+ .n_proto = htons(0xFFFF),
+ };
+ struct flow_match_basic match_basic = {
+ .key = &key_basic, .mask = &mask_basic,
+ };
+ struct flow_match_control match;
+ u16 addr_type;
+
+ flow_rule_match_enc_control(rule, &match);
+ addr_type = match.key->addr_type;
+
+ /* For tunnel addr_type used same key id`s as for non-tunnel */
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_enc_ipv4_addrs(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(match.key->dst));
+
+ key_basic.n_proto = htons(ETH_P_IP);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
+ } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_enc_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+
+ key_basic.n_proto = htons(ETH_P_IPV6);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ struct flow_match_ip match;
+
+ flow_rule_match_enc_ip(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+ match.mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+ match.key->tos & 0x3);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+ match.mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+ match.key->tos >> 2);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+ match.mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+ match.key->ttl);
+
+ if (match.mask->ttl &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB
+ (priv->mdev,
+ ft_field_support.outer_ipv4_ttl)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on TTL is not supported");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+
+ /* let software handle IP fragments */
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
+
+ return 0;
+
+out:
+ return err;
+}
+
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ /* Full udp dst port must be given */
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "UDP tunnel decap filter must include enc_dst_port condition");
+ netdev_warn(priv->netdev,
+ "UDP tunnel decap filter must include enc_dst_port condition\n");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ if (memchr_inv(&enc_ports.mask->dst, 0xff,
+ sizeof(enc_ports.mask->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "UDP tunnel decap filter must match enc_dst_port fully");
+ netdev_warn(priv->netdev,
+ "UDP tunnel decap filter must match enc_dst_port fully\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* match on UDP protocol and dst port number */
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+ ntohs(enc_ports.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+ ntohs(enc_ports.key->dst));
+
+ /* UDP src port on outer header is generated by HW,
+ * so it is probably a bad idea to request matching it.
+ * Nonetheless, it is allowed.
+ */
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+ ntohs(enc_ports.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+ ntohs(enc_ports.key->src));
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
new file mode 100644
index 000000000..b38f693bb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUNNEL_H__
+#define __MLX5_EN_TC_TUNNEL_H__
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/fs.h>
+#include <net/pkt_cls.h>
+#include <linux/netlink.h>
+#include "en.h"
+#include "en_rep.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+enum {
+ MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
+ MLX5E_TC_TUNNEL_TYPE_VXLAN,
+ MLX5E_TC_TUNNEL_TYPE_GENEVE,
+ MLX5E_TC_TUNNEL_TYPE_GRETAP,
+ MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
+};
+
+struct mlx5e_encap_key {
+ const struct ip_tunnel_key *ip_tun_key;
+ struct mlx5e_tc_tunnel *tc_tunnel;
+};
+
+struct mlx5e_tc_tunnel {
+ int tunnel_type;
+ enum mlx5_flow_match_level match_level;
+
+ bool (*can_offload)(struct mlx5e_priv *priv);
+ int (*calc_hlen)(struct mlx5e_encap_entry *e);
+ int (*init_encap_attr)(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack);
+ int (*generate_ip_tun_hdr)(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e);
+ int (*parse_udp_ports)(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+ int (*parse_tunnel)(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+ bool (*encap_info_equal)(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b);
+ int (*get_remote_ifindex)(struct net_device *mirred_dev);
+};
+
+extern struct mlx5e_tc_tunnel vxlan_tunnel;
+extern struct mlx5e_tc_tunnel geneve_tunnel;
+extern struct mlx5e_tc_tunnel gre_tunnel;
+extern struct mlx5e_tc_tunnel mplsoudp_tunnel;
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack);
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+#else
+static inline int
+mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
+static inline int
+mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
+#endif
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct net_device *filter_dev);
+
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev);
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ u8 *match_level);
+
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+
+bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b);
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
new file mode 100644
index 000000000..907ad6ffe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -0,0 +1,1766 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <net/fib_notifier.h>
+#include <net/nexthop.h>
+#include "tc_tun_encap.h"
+#include "en_tc.h"
+#include "tc_tun.h"
+#include "rep/tc.h"
+#include "diag/en_tc_tracepoint.h"
+
+enum {
+ MLX5E_ROUTE_ENTRY_VALID = BIT(0),
+};
+
+static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_encap_entry *e,
+ int out_index)
+{
+ struct net_device *route_dev;
+ int err = 0;
+
+ route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
+
+ if (!route_dev || !netif_is_ovs_master(route_dev) ||
+ attr->parse_attr->filter_dev == e->out_dev)
+ goto out;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
+ MLX5E_TC_INT_PORT_EGRESS,
+ &attr->action, out_index);
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+
+ return err;
+}
+
+struct mlx5e_route_key {
+ int ip_version;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } endpoint_ip;
+};
+
+struct mlx5e_route_entry {
+ struct mlx5e_route_key key;
+ struct list_head encap_entries;
+ struct list_head decap_flows;
+ u32 flags;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ int tunnel_dev_index;
+ struct rcu_head rcu;
+};
+
+struct mlx5e_tc_tun_encap {
+ struct mlx5e_priv *priv;
+ struct notifier_block fib_nb;
+ spinlock_t route_lock; /* protects route_tbl */
+ unsigned long route_tbl_last_update;
+ DECLARE_HASHTABLE(route_tbl, 8);
+};
+
+static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
+{
+ return r->flags & MLX5E_ROUTE_ENTRY_VALID;
+}
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ struct mlx5_rx_tun_attr *tun_attr;
+ void *daddr, *saddr;
+ u8 ip_version;
+
+ tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
+ if (!tun_attr)
+ return -ENOMEM;
+
+ esw_attr->rx_tun_attr = tun_attr;
+ ip_version = mlx5e_tc_get_ip_version(spec, true);
+
+ if (ip_version == 4) {
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ tun_attr->dst_ip.v4 = *(__be32 *)daddr;
+ tun_attr->src_ip.v4 = *(__be32 *)saddr;
+ if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
+ return 0;
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (ip_version == 6) {
+ int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
+ memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
+ if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
+ ipv6_addr_any(&tun_attr->src_ip.v6))
+ return 0;
+ }
+#endif
+ /* Only set the flag if both src and dst ip addresses exist. They are
+ * required to establish routing.
+ */
+ flow_flag_set(flow, TUN_RX);
+ flow->attr->tun_ip_version = ip_version;
+ return 0;
+}
+
+static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
+{
+ bool all_flow_encaps_valid = true;
+ int i;
+
+ /* Flow can be associated with multiple encap entries.
+ * Before offloading the flow verify that all of them have
+ * a valid neighbour.
+ */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+ all_flow_encaps_valid = false;
+ break;
+ }
+ }
+
+ return all_flow_encaps_valid;
+}
+
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
+ return;
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = e->encap_size;
+ reformat_params.data = e->encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
+ PTR_ERR(e->pkt_reformat));
+ return;
+ }
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
+ continue;
+
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+
+ /* Do not offload flows with unresolved neighbors */
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ continue;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ continue;
+ }
+
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
+ if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ flow->rule[0] = rule;
+ /* was unset when slow path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
+
+ /* Clear pkt_reformat before checking slow path flag. Because
+ * in next iteration, the same flow is already set slow path
+ * flag, but still need to clear the pkt_reformat.
+ */
+ if (flow_flag_test(flow, SLOW))
+ continue;
+
+ /* update from encap rule to slow path rule */
+ spec = &flow->attr->parse_attr->spec;
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ flow->rule[0] = rule;
+ /* was unset when fast path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+
+ /* we know that the encap is valid */
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ e->pkt_reformat = NULL;
+}
+
+static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
+ struct list_head *flow_list,
+ int index)
+{
+ if (IS_ERR(mlx5e_flow_get(flow))) {
+ /* Flow is being deleted concurrently. Wait for it to be
+ * unoffloaded from hardware, otherwise deleting encap will
+ * fail.
+ */
+ wait_for_completion(&flow->del_hw_done);
+ return;
+ }
+ wait_for_completion(&flow->init_done);
+
+ flow->tmp_entry_index = index;
+ list_add(&flow->tmp_list, flow_list);
+}
+
+/* Takes reference to all flows attached to encap and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
+{
+ struct encap_flow_item *efi;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
+ mlx5e_take_tmp_flow(flow, flow_list, efi->index);
+ }
+}
+
+/* Takes reference to all flows attached to route and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
+ struct list_head *flow_list)
+{
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, &r->decap_flows, decap_routes)
+ mlx5e_take_tmp_flow(flow, flow_list, 0);
+}
+
+typedef bool (match_cb)(struct mlx5e_encap_entry *);
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e,
+ match_cb match)
+{
+ struct mlx5e_encap_entry *next = NULL;
+
+retry:
+ rcu_read_lock();
+
+ /* find encap with non-zero reference counter value */
+ for (next = e ?
+ list_next_or_null_rcu(&nhe->encap_list,
+ &e->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list) :
+ list_first_or_null_rcu(&nhe->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list);
+ next;
+ next = list_next_or_null_rcu(&nhe->encap_list,
+ &next->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list))
+ if (mlx5e_encap_take(next))
+ break;
+
+ rcu_read_unlock();
+
+ /* release starting encap */
+ if (e)
+ mlx5e_encap_put(netdev_priv(e->out_dev), e);
+ if (!next)
+ return next;
+
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&next->res_ready);
+ /* continue searching if encap entry is not in valid state after completion */
+ if (!match(next)) {
+ e = next;
+ goto retry;
+ }
+
+ return next;
+}
+
+static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
+{
+ return e->flags & MLX5_ENCAP_ENTRY_VALID;
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e)
+{
+ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
+}
+
+static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
+{
+ return e->compl_result >= 0;
+}
+
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e)
+{
+ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
+}
+
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+ struct mlx5e_encap_entry *e = NULL;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_fc *counter;
+ struct neigh_table *tbl;
+ bool neigh_used = false;
+ struct neighbour *n;
+ u64 lastuse;
+
+ if (m_neigh->family == AF_INET)
+ tbl = &arp_tbl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (m_neigh->family == AF_INET6)
+ tbl = ipv6_stub->nd_tbl;
+#endif
+ else
+ return;
+
+ /* mlx5e_get_next_valid_encap() releases previous encap before returning
+ * next one.
+ */
+ while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
+ struct mlx5e_priv *priv = netdev_priv(e->out_dev);
+ struct encap_flow_item *efi, *tmp;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow,
+ encaps[efi->index]);
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+ list_add(&flow->tmp_list, &flow_list);
+
+ if (mlx5e_is_offloaded_flow(flow)) {
+ counter = mlx5e_tc_get_counter(flow);
+ lastuse = mlx5_fc_query_lastuse(counter);
+ if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
+ neigh_used = true;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ if (neigh_used) {
+ /* release current encap before breaking the loop */
+ mlx5e_encap_put(priv, e);
+ break;
+ }
+ }
+
+ trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
+
+ if (neigh_used) {
+ nhe->reported_lastuse = jiffies;
+
+ /* find the relevant neigh according to the cached device and
+ * dst ip pair
+ */
+ n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
+ if (!n)
+ return;
+
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+}
+
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ WARN_ON(!list_empty(&e->flows));
+
+ if (e->compl_result > 0) {
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ }
+
+ kfree(e->tun_info);
+ kfree(e->encap_header);
+ kfree_rcu(e, rcu);
+}
+
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_decap_entry *d)
+{
+ WARN_ON(!list_empty(&d->flows));
+
+ if (!d->compl_result)
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
+
+ kfree_rcu(d, rcu);
+}
+
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
+ return;
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index);
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index)
+{
+ struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return;
+
+ if (attr->esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ mlx5e_detach_encap_route(priv, flow, out_index);
+
+ /* flow wasn't fully initialized */
+ if (!e)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->encaps[out_index].list);
+ flow->encaps[out_index].e = NULL;
+ if (!refcount_dec_and_test(&e->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
+
+ if (!d)
+ return;
+
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ list_del(&flow->l3_to_l2_reformat);
+ flow->decap_reformat = NULL;
+
+ if (!refcount_dec_and_test(&d->refcnt)) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b)
+{
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
+ a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
+}
+
+static int cmp_decap_info(struct mlx5e_decap_key *a,
+ struct mlx5e_decap_key *b)
+{
+ return memcmp(&a->key, &b->key, sizeof(b->key));
+}
+
+static int hash_encap_info(struct mlx5e_encap_key *key)
+{
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+ key->tc_tunnel->tunnel_type);
+}
+
+static int hash_decap_info(struct mlx5e_decap_key *key)
+{
+ return jhash(&key->key, sizeof(key->key), 0);
+}
+
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_key e_key;
+ struct mlx5e_encap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+ encap_hlist, hash_key) {
+ e_key.ip_tun_key = &e->tun_info->key;
+ e_key.tc_tunnel = e->tunnel;
+ if (e->tunnel->encap_info_equal(&e_key, key) &&
+ mlx5e_encap_take(e))
+ return e;
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_decap_entry *
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_key r_key;
+ struct mlx5e_decap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
+ hlist, hash_key) {
+ r_key = e->key;
+ if (!cmp_decap_info(&r_key, key) &&
+ mlx5e_decap_take(e))
+ return e;
+ }
+ return NULL;
+}
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+ size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+ return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
+static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < out_index; i++) {
+ if (flow->encaps[i].e != e)
+ continue;
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
+ netdev_err(priv->netdev, "can't duplicate encap action\n");
+ return true;
+ }
+
+ return false;
+}
+
+static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
+ goto out;
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ attr->dest_chain = 0;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
+ data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
+ vport_num);
+ err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB,
+ VPORT_TO_REG, data);
+ if (err >= 0) {
+ esw_attr->dests[out_index].src_port_rewrite_act_id = err;
+ err = 0;
+ }
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ int act_id = attr->dests[out_index].src_port_rewrite_act_id;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
+ vport_num);
+ mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ unsigned int ret;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ spin_lock_bh(&encap->route_lock);
+ ret = encap->route_tbl_last_update;
+ spin_unlock_bh(&encap->route_lock);
+ return ret;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ const struct ip_tunnel_info *tun_info;
+ const struct mlx5e_mpls_info *mpls_info;
+ unsigned long tbl_time_before = 0;
+ struct mlx5e_encap_entry *e;
+ struct mlx5e_encap_key key;
+ bool entry_created = false;
+ unsigned short family;
+ uintptr_t hash_key;
+ int err = 0;
+
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ mpls_info = &parse_attr->mpls_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+ key.ip_tun_key = &tun_info->key;
+ key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
+ if (!key.tc_tunnel) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
+ return -EOPNOTSUPP;
+ }
+
+ hash_key = hash_encap_info(&key);
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ e = mlx5e_encap_get(priv, &key, hash_key);
+
+ /* must verify if encap is valid or not */
+ if (e) {
+ /* Check that entry was not already attached to this flow */
+ if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ wait_for_completion(&e->res_ready);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (e->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto out_err;
+ }
+ goto attach_flow;
+ }
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ refcount_set(&e->refcnt, 1);
+ init_completion(&e->res_ready);
+ entry_created = true;
+ INIT_LIST_HEAD(&e->route_list);
+
+ tun_info = mlx5e_dup_tun_info(tun_info);
+ if (!tun_info) {
+ err = -ENOMEM;
+ goto out_err_init;
+ }
+ e->tun_info = tun_info;
+ memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+ if (err)
+ goto out_err_init;
+
+ INIT_LIST_HEAD(&e->flows);
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ if (family == AF_INET)
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
+ else if (family == AF_INET6)
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ complete_all(&e->res_ready);
+ if (err) {
+ e->compl_result = err;
+ goto out_err;
+ }
+ e->compl_result = 1;
+
+attach_flow:
+ err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
+ tbl_time_before, out_index);
+ if (err)
+ goto out_err;
+
+ err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
+ if (err == -EOPNOTSUPP) {
+ /* If device doesn't support int port offload,
+ * redirect to uplink vport.
+ */
+ mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
+ err = 0;
+ } else if (err) {
+ goto out_err;
+ }
+
+ flow->encaps[out_index].e = e;
+ list_add(&flow->encaps[out_index].list, &e->flows);
+ flow->encaps[out_index].index = out_index;
+ *encap_dev = e->out_dev;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
+ attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ } else {
+ flow_flag_set(flow, SLOW);
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ if (e)
+ mlx5e_encap_put(priv, e);
+ return err;
+
+out_err_init:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ kfree(tun_info);
+ kfree(e);
+ return err;
+}
+
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5e_decap_entry *d;
+ struct mlx5e_decap_key key;
+ uintptr_t hash_key;
+ int err = 0;
+
+ if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "encap header larger than max supported");
+ return -EOPNOTSUPP;
+ }
+
+ key.key = attr->eth;
+ hash_key = hash_decap_info(&key);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ d = mlx5e_decap_get(priv, &key, hash_key);
+ if (d) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ wait_for_completion(&d->res_ready);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ if (d->compl_result) {
+ err = -EREMOTEIO;
+ goto out_free;
+ }
+ goto found;
+ }
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ d->key = key;
+ refcount_set(&d->refcnt, 1);
+ init_completion(&d->res_ready);
+ INIT_LIST_HEAD(&d->flows);
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ reformat_params.size = sizeof(attr->eth);
+ reformat_params.data = &attr->eth;
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(d->pkt_reformat)) {
+ err = PTR_ERR(d->pkt_reformat);
+ d->compl_result = err;
+ }
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ complete_all(&d->res_ready);
+ if (err)
+ goto out_free;
+
+found:
+ flow->decap_reformat = d;
+ attr->decap_pkt_reformat = d->pkt_reformat;
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return 0;
+
+out_free:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ mlx5e_decap_put(priv, d);
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return err;
+}
+
+static int cmp_route_info(struct mlx5e_route_key *a,
+ struct mlx5e_route_key *b)
+{
+ if (a->ip_version == 4 && b->ip_version == 4)
+ return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
+ sizeof(a->endpoint_ip.v4));
+ else if (a->ip_version == 6 && b->ip_version == 6)
+ return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
+ sizeof(a->endpoint_ip.v6));
+ return 1;
+}
+
+static u32 hash_route_info(struct mlx5e_route_key *key)
+{
+ if (key->ip_version == 4)
+ return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
+ return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
+}
+
+static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r)
+{
+ WARN_ON(!list_empty(&r->decap_flows));
+ WARN_ON(!list_empty(&r->encap_entries));
+
+ kfree_rcu(r, rcu);
+}
+
+static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ lockdep_assert_held(&esw->offloads.encap_tbl_lock);
+
+ if (!refcount_dec_and_test(&r->refcnt))
+ return;
+ hash_del_rcu(&r->hlist);
+ mlx5e_route_dealloc(priv, r);
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
+ u32 hash_key)
+{
+ struct mlx5e_route_key r_key;
+ struct mlx5e_route_entry *r;
+
+ hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
+ r_key = r->key;
+ if (!cmp_route_info(&r_key, key) &&
+ refcount_inc_not_zero(&r->refcnt))
+ return r;
+ }
+ return NULL;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get_create(struct mlx5e_priv *priv,
+ struct mlx5e_route_key *key,
+ int tunnel_dev_index,
+ unsigned long *route_tbl_change_time)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ struct mlx5e_route_entry *r;
+ u32 hash_key;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ hash_key = hash_route_info(key);
+ spin_lock_bh(&encap->route_lock);
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+ if (r) {
+ if (!mlx5e_route_entry_valid(r)) {
+ mlx5e_route_put_locked(priv, r);
+ return ERR_PTR(-EINVAL);
+ }
+ return r;
+ }
+
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (!r)
+ return ERR_PTR(-ENOMEM);
+
+ r->key = *key;
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+ r->tunnel_dev_index = tunnel_dev_index;
+ refcount_set(&r->refcnt, 1);
+ INIT_LIST_HEAD(&r->decap_flows);
+ INIT_LIST_HEAD(&r->encap_entries);
+
+ spin_lock_bh(&encap->route_lock);
+ *route_tbl_change_time = encap->route_tbl_last_update;
+ hash_add(encap->route_tbl, &r->hlist, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
+{
+ u32 hash_key = hash_route_info(key);
+ struct mlx5e_route_entry *r;
+
+ spin_lock_bh(&encap->route_lock);
+ encap->route_tbl_last_update = jiffies;
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+struct mlx5e_tc_fib_event_data {
+ struct work_struct work;
+ unsigned long event;
+ struct mlx5e_route_entry *r;
+ struct net_device *ul_dev;
+};
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work);
+static struct mlx5e_tc_fib_event_data *
+mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+
+ fib_work = kzalloc(sizeof(*fib_work), flags);
+ if (WARN_ON(!fib_work))
+ return NULL;
+
+ INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
+ fib_work->event = event;
+ fib_work->ul_dev = ul_dev;
+
+ return fib_work;
+}
+
+static int
+mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ unsigned long event)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct net_device *ul_dev;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ ul_dev = uplink_rpriv->netdev;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
+ if (!fib_work)
+ return -ENOMEM;
+
+ dev_hold(ul_dev);
+ refcount_inc(&r->refcnt);
+ fib_work->r = r;
+ queue_work(priv->wq, &fib_work->work);
+
+ return 0;
+}
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_before, tbl_time_after;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (!esw_attr->rx_tun_attr)
+ goto out;
+
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ tbl_time_after = tbl_time_before;
+ err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
+ if (err || !esw_attr->rx_tun_attr->decap_vport)
+ goto out;
+
+ key.ip_version = attr->tun_ip_version;
+ if (key.ip_version == 4)
+ key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
+ else
+ key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
+ &tbl_time_after);
+ if (IS_ERR(r)) {
+ err = PTR_ERR(r);
+ goto out;
+ }
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ goto out;
+ }
+ }
+
+ flow->decap_route = r;
+ list_add(&flow->decap_routes, &r->decap_flows);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return 0;
+
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return err;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_after = tbl_time_before;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ unsigned short family;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+
+ if (family == AF_INET) {
+ key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
+ key.ip_version = 4;
+ } else if (family == AF_INET6) {
+ key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
+ key.ip_version = 6;
+ }
+
+ err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
+ e->route_dev_ifindex, out_index);
+ if (err || !(esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
+ return err;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
+ &tbl_time_after);
+ if (IS_ERR(r))
+ return PTR_ERR(r);
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ return err;
+ }
+ }
+
+ flow->encap_routes[out_index].r = r;
+ if (new_encap_entry)
+ list_add(&e->route_list, &r->encap_entries);
+ flow->encap_routes[out_index].index = out_index;
+ return 0;
+}
+
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_route_entry *r = flow->decap_route;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->decap_routes);
+ flow->decap_route = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index)
+{
+ struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e, *tmp;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ flow->encap_routes[out_index].r = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
+ list_del_init(&e->route_list);
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_attr *attr;
+
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+
+ if (flow_flag_test(flow, SLOW))
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ else
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
+ attr->modify_hdr = NULL;
+
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ e->pkt_reformat = NULL;
+ }
+}
+
+static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
+ struct net_device *tunnel_dev,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
+ mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
+ mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
+ e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+
+ err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
+ e->out_dev, e->route_dev_ifindex,
+ flow->tmp_entry_index);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
+ continue;
+ }
+
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
+ err);
+ continue;
+ }
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ goto offload_to_slow_path;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ goto offload_to_slow_path;
+ }
+
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
+ if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ } else {
+offload_to_slow_path:
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ }
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ struct mlx5e_encap_entry *e;
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ list_for_each_entry(e, &r->encap_entries, route_list) {
+ LIST_HEAD(encap_flows);
+
+ mlx5e_take_all_encap_flows(e, &encap_flows);
+ if (list_empty(&encap_flows))
+ continue;
+
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_invalidate_encap(priv, e, &encap_flows);
+
+ if (!replace) {
+ list_splice(&encap_flows, flow_list);
+ continue;
+ }
+
+ mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
+ list_splice(&encap_flows, flow_list);
+ }
+
+ return 0;
+}
+
+static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, flow_list, tmp_list)
+ if (mlx5e_is_offloaded_flow(flow))
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+}
+
+static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
+ struct list_head *decap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, decap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ parse_attr = attr->parse_attr;
+ spec = &parse_attr->spec;
+ err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
+ err);
+ continue;
+ }
+
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ flow_flag_set(flow, OFFLOADED);
+ }
+ }
+}
+
+static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ LIST_HEAD(decap_flows);
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ mlx5e_take_all_route_decap_flows(r, &decap_flows);
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_unoffload_flow_list(priv, &decap_flows);
+ if (replace)
+ mlx5e_reoffload_decap(priv, &decap_flows);
+
+ list_splice(&decap_flows, flow_list);
+
+ return 0;
+}
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work)
+{
+ struct mlx5e_tc_fib_event_data *event_data =
+ container_of(work, struct mlx5e_tc_fib_event_data, work);
+ struct net_device *ul_dev = event_data->ul_dev;
+ struct mlx5e_priv *priv = netdev_priv(ul_dev);
+ struct mlx5e_route_entry *r = event_data->r;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+ bool replace;
+ int err;
+
+ /* sync with concurrent neigh updates */
+ rtnl_lock();
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
+
+ if (!mlx5e_route_entry_valid(r) && !replace)
+ goto out;
+
+ err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
+ err);
+
+ err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
+ err);
+
+ if (replace)
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ rtnl_unlock();
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ mlx5e_route_put(priv, event_data->r);
+ dev_put(event_data->ul_dev);
+ kfree(event_data);
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib_entry_notifier_info, info);
+ if (fen_info->fi->nh)
+ return NULL;
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->dst_len != 32)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ key.endpoint_ip.v4 = htonl(fen_info->dst);
+ key.ip_version = 4;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib6_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib6_entry_notifier_info, info);
+ fib_dev = fib6_info_nh_dev(fen_info->rt);
+ if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->rt->fib6_dst.plen != 128)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
+ sizeof(fen_info->rt->fib6_dst.addr));
+ key.ip_version = 6;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct fib_notifier_info *info = ptr;
+ struct mlx5e_tc_tun_encap *encap;
+ struct net_device *ul_dev;
+ struct mlx5e_priv *priv;
+
+ encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
+ priv = encap->priv;
+ ul_dev = priv->netdev;
+ priv = netdev_priv(ul_dev);
+
+ switch (event) {
+ case FIB_EVENT_ENTRY_REPLACE:
+ case FIB_EVENT_ENTRY_DEL:
+ if (info->family == AF_INET)
+ fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
+ else if (info->family == AF_INET6)
+ fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
+ else
+ return NOTIFY_DONE;
+
+ if (!IS_ERR_OR_NULL(fib_work)) {
+ queue_work(priv->wq, &fib_work->work);
+ } else if (IS_ERR(fib_work)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
+ mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
+ PTR_ERR(fib_work));
+ }
+
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_DONE;
+}
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_tun_encap *encap;
+ int err;
+
+ encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
+ if (!encap)
+ return ERR_PTR(-ENOMEM);
+
+ encap->priv = priv;
+ encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
+ spin_lock_init(&encap->route_lock);
+ hash_init(encap->route_tbl);
+ err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
+ NULL, NULL);
+ if (err) {
+ kvfree(encap);
+ return ERR_PTR(err);
+ }
+
+ return encap;
+}
+
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
+{
+ if (!encap)
+ return;
+
+ unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
+ flush_workqueue(encap->priv->wq); /* flush fib event works */
+ kvfree(encap);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
new file mode 100644
index 000000000..8ad273dde
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUN_ENCAP_H__
+#define __MLX5_EN_TC_TUN_ENCAP_H__
+
+#include "tc_priv.h"
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev);
+
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack);
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info);
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv);
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap);
+
+#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
new file mode 100644
index 000000000..054d80c4e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/geneve.h>
+#include "lib/geneve.h"
+#include "en/tc_tun.h"
+
+#define MLX5E_GENEVE_VER 0
+
+static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv)
+{
+ return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE);
+}
+
+static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e)
+{
+ return sizeof(struct udphdr) +
+ sizeof(struct genevehdr) +
+ e->tun_info->options_len;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ /* Currently we support only default GENEVE
+ * port, so udp dst port must match.
+ */
+ if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP dst port is not registered as a GENEVE port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a GENEVE port\n",
+ be16_to_cpu(enc_ports.key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err;
+
+ err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_check_udp_dport_geneve(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &geneve_tunnel;
+
+ /* Reformat type for GENEVE encap is similar to VXLAN:
+ * in both cases the HW adds in the same place a
+ * defined encapsulation header that the SW provides.
+ */
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ return 0;
+}
+
+static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ vni[0] = (__force __u8)(tun_id >> 16);
+ vni[1] = (__force __u8)(tun_id >> 8);
+ vni[2] = (__force __u8)tun_id;
+#else
+ vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+ vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+ vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+static int mlx5e_gen_ip_tunnel_header_geneve(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_info *tun_info = e->tun_info;
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct genevehdr *geneveh;
+
+ geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr));
+
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_info->key.tp_dst;
+
+ memset(geneveh, 0, sizeof(*geneveh));
+ geneveh->ver = MLX5E_GENEVE_VER;
+ geneveh->opt_len = tun_info->options_len / 4;
+ geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM);
+ geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT);
+ mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni);
+ geneveh->proto_type = htons(ETH_P_TEB);
+
+ if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ if (!geneveh->opt_len)
+ return -EOPNOTSUPP;
+ ip_tunnel_info_opts_get(geneveh->options, tun_info);
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_enc_keyid enc_keyid;
+ void *misc_c, *misc_v;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported");
+ netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid));
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len);
+ u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ void *misc_c, *misc_v, *misc_3_c, *misc_3_v;
+ struct geneve_opt *option_key, *option_mask;
+ __be32 opt_data_key = 0, opt_data_mask = 0;
+ struct flow_match_enc_opts enc_opts;
+ int res = 0;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3);
+ misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
+ return 0;
+
+ flow_rule_match_enc_opts(rule, &enc_opts);
+
+ if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.geneve_tlv_option_0_data)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* make sure that we're talking about GENEVE options */
+
+ if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: option type is not GENEVE");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: option type is not GENEVE\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (enc_opts.mask->len &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_geneve_opt_len)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options len is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it
+ * doesn't include the TLV option header. 'geneve_opt_len' is a total
+ * len of all the options, including the headers, also multiples of 4
+ * bytes. Len that comes from the dissector is in bytes.
+ */
+
+ if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: unsupported options len");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: unsupported options len (len=%d)\n",
+ enc_opts.key->len);
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4);
+
+ /* we support matching on one option only, so just get it */
+ option_key = (struct geneve_opt *)&enc_opts.key->data[0];
+ option_mask = (struct geneve_opt *)&enc_opts.mask->data[0];
+
+ if (option_mask->opt_class == 0 && option_mask->type == 0 &&
+ !memchr_inv(option_mask->opt_data, 0, option_mask->length * 4))
+ return 0;
+
+ if (option_key->length > max_tlv_option_data_len) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: unsupported option len");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n",
+ option_key->length, option_mask->length);
+ return -EOPNOTSUPP;
+ }
+
+ /* data can't be all 0 - fail to offload such rule */
+ if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: can't match on 0 data field");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: can't match on 0 data field\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* add new GENEVE TLV options object */
+ res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key);
+ if (res) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: failed creating TLV opt object");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n",
+ be16_to_cpu(option_key->opt_class),
+ option_key->type, option_key->length);
+ return res;
+ }
+
+ /* In general, after creating the object, need to query it
+ * in order to check which option data to set in misc3.
+ * But we support only geneve_tlv_option_0_data, so no
+ * point querying at this stage.
+ */
+
+ memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4);
+ memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4);
+ MLX5_SET(fte_match_set_misc3, misc_3_v,
+ geneve_tlv_option_0_data, be32_to_cpu(opt_data_key));
+ MLX5_SET(fte_match_set_misc3, misc_3_c,
+ geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.geneve_tlv_option_0_exist)) {
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist);
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ struct netlink_ext_ack *extack = f->common.extack;
+
+ /* match on OAM - packets with OAM bit on should NOT be offloaded */
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported");
+ netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0);
+
+ /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */
+
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_geneve_protocol_type)) {
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB);
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err;
+
+ err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_parse_geneve_options(priv, spec, f);
+}
+
+static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b)
+{
+ struct ip_tunnel_info *a_info;
+ struct ip_tunnel_info *b_info;
+ bool a_has_opts, b_has_opts;
+
+ if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
+ return false;
+
+ a_has_opts = !!(a->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT);
+ b_has_opts = !!(b->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT);
+
+ /* keys are equal when both don't have any options attached */
+ if (!a_has_opts && !b_has_opts)
+ return true;
+
+ if (a_has_opts != b_has_opts)
+ return false;
+
+ /* geneve options stored in memory next to ip_tunnel_info struct */
+ a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
+ b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
+
+ return a_info->options_len == b_info->options_len &&
+ memcmp(a_info + 1, b_info + 1, a_info->options_len) == 0;
+}
+
+struct mlx5e_tc_tunnel geneve_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = mlx5e_tc_tun_can_offload_geneve,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve,
+ .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve,
+ .parse_tunnel = mlx5e_tc_tun_parse_geneve,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_geneve,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
new file mode 100644
index 000000000..ada14f057
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/gre.h>
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv)
+{
+ return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e)
+{
+ return gre_calc_hlen(e->tun_info->key.tun_flags);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &gre_tunnel;
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_gretap(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ int hdr_len;
+
+ *ip_proto = IPPROTO_GRE;
+
+ /* the HW does not calculate GRE csum or sequences */
+ if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
+ return -EOPNOTSUPP;
+
+ greh->protocol = htons(ETH_P_TEB);
+
+ /* GRE key */
+ hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e);
+ greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
+ if (tun_key->tun_flags & TUNNEL_KEY) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+ *ptr = tun_id;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE);
+
+ /* gre protocol */
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
+ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
+
+ /* gre key */
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_match_enc_keyid enc_keyid;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+ MLX5_SET(fte_match_set_misc, misc_c,
+ gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v,
+ gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel gre_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP,
+ .match_level = MLX5_MATCH_L3,
+ .can_offload = mlx5e_tc_tun_can_offload_gretap,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap,
+ .parse_udp_ports = NULL,
+ .parse_tunnel = mlx5e_tc_tun_parse_gretap,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
new file mode 100644
index 000000000..c5b1617d5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/bareudp.h>
+#include <net/mpls.h>
+#include "en/tc_tun.h"
+
+static bool can_offload(struct mlx5e_priv *priv)
+{
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2);
+}
+
+static int calc_hlen(struct mlx5e_encap_entry *e)
+{
+ return sizeof(struct udphdr) + MPLS_HLEN;
+}
+
+static int init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &mplsoudp_tunnel;
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ return 0;
+}
+
+static int generate_ip_tun_hdr(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *r)
+{
+ const struct ip_tunnel_key *tun_key = &r->tun_info->key;
+ const struct mlx5e_mpls_info *mpls_info = &r->mpls_info;
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct mpls_shim_hdr *mpls;
+
+ mpls = (struct mpls_shim_hdr *)(udp + 1);
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_key->tp_dst;
+ *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos);
+
+ return 0;
+}
+
+static int parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+}
+
+static int parse_tunnel(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_match_mpls match;
+ void *misc2_c;
+ void *misc2_v;
+
+ if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) &&
+ !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP))
+ return -EOPNOTSUPP;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return -EOPNOTSUPP;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+ return 0;
+
+ flow_rule_match_mpls(rule, &match);
+
+ /* Only support matching the first LSE */
+ if (match.mask->used_lses != 1)
+ return -EOPNOTSUPP;
+
+ misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_label,
+ match.mask->ls[0].mpls_label);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_label,
+ match.key->ls[0].mpls_label);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_exp,
+ match.mask->ls[0].mpls_tc);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_exp, match.key->ls[0].mpls_tc);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_s_bos,
+ match.mask->ls[0].mpls_bos);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_s_bos,
+ match.key->ls[0].mpls_bos);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_ttl,
+ match.mask->ls[0].mpls_ttl);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_ttl,
+ match.key->ls[0].mpls_ttl);
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel mplsoudp_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = can_offload,
+ .calc_hlen = calc_hlen,
+ .init_encap_attr = init_encap_attr,
+ .generate_ip_tun_hdr = generate_ip_tun_hdr,
+ .parse_udp_ports = parse_udp_ports,
+ .parse_tunnel = parse_tunnel,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
new file mode 100644
index 000000000..1f62c702b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include "lib/vxlan.h"
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv)
+{
+ return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e)
+{
+ return VXLAN_HLEN;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ /* check the UDP destination port validity */
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan,
+ be16_to_cpu(enc_ports.key->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP dst port is not registered as a VXLAN port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a VXLAN port\n",
+ be16_to_cpu(enc_ports.key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err = 0;
+
+ err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int dst_port = be16_to_cpu(e->tun_info->key.tp_dst);
+
+ e->tunnel = &vxlan_tunnel;
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vxlan udp dport was not registered with the HW");
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n",
+ dst_port);
+ return -EOPNOTSUPP;
+ }
+
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct vxlanhdr *vxh;
+
+ if (tun_key->tun_flags & TUNNEL_VXLAN_OPT)
+ return -EOPNOTSUPP;
+ vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_key->tp_dst;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(tun_id);
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_enc_keyid enc_keyid;
+ void *misc_c, *misc_v;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ /* match on VNI is required */
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_vxlan_vni)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on VXLAN VNI is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on VXLAN VNI is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+ be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+ be32_to_cpu(enc_keyid.key->keyid));
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev)
+{
+ const struct vxlan_dev *vxlan = netdev_priv(mirred_dev);
+ const struct vxlan_rdst *dst = &vxlan->default_dst;
+
+ return dst->remote_ifindex;
+}
+
+struct mlx5e_tc_tunnel vxlan_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = mlx5e_tc_tun_can_offload_vxlan,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan,
+ .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan,
+ .parse_tunnel = mlx5e_tc_tun_parse_vxlan,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
+ .get_remote_ifindex = mlx5e_tc_tun_get_remote_ifindex,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
new file mode 100644
index 000000000..d4239e3b3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "tir.h"
+#include "params.h"
+#include <linux/mlx5/transobj.h>
+
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
+
+/* max() doesn't work inside square brackets. */
+#define MLX5E_TIR_CMD_IN_SZ_DW ( \
+ MLX5_ST_SZ_DW(create_tir_in) > MLX5_ST_SZ_DW(modify_tir_in) ? \
+ MLX5_ST_SZ_DW(create_tir_in) : MLX5_ST_SZ_DW(modify_tir_in) \
+)
+
+struct mlx5e_tir_builder {
+ u32 in[MLX5E_TIR_CMD_IN_SZ_DW];
+ bool modify;
+};
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify)
+{
+ struct mlx5e_tir_builder *builder;
+
+ builder = kvzalloc(sizeof(*builder), GFP_KERNEL);
+ builder->modify = modify;
+
+ return builder;
+}
+
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder)
+{
+ kvfree(builder);
+}
+
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder)
+{
+ memset(builder->in, 0, sizeof(builder->in));
+}
+
+static void *mlx5e_tir_builder_get_tirc(struct mlx5e_tir_builder *builder)
+{
+ if (builder->modify)
+ return MLX5_ADDR_OF(modify_tir_in, builder->in, ctx);
+ return MLX5_ADDR_OF(create_tir_in, builder->in, ctx);
+}
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
+ MLX5_SET(tirc, tirc, inline_rqn, rqn);
+}
+
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ u32 rqtn, bool inner_ft_support)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, rqtn);
+ MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
+}
+
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_packet_merge_param *pkt_merge_param)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+ const unsigned int rough_max_l2_l3_hdr_sz = 256;
+
+ if (builder->modify)
+ MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1);
+
+ switch (pkt_merge_param->type) {
+ case MLX5E_PACKET_MERGE_LRO:
+ MLX5_SET(tirc, tirc, packet_merge_mask,
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO |
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+ (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
+ break;
+ default:
+ break;
+ }
+}
+
+static int mlx5e_hfunc_to_hw(u8 hfunc)
+{
+ switch (hfunc) {
+ case ETH_RSS_HASH_TOP:
+ return MLX5_RX_HASH_FN_TOEPLITZ;
+ case ETH_RSS_HASH_XOR:
+ return MLX5_RX_HASH_FN_INVERTED_XOR8;
+ default:
+ return MLX5_RX_HASH_FN_NONE;
+ }
+}
+
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_rss_params_hash *rss_hash,
+ const struct mlx5e_rss_params_traffic_type *rss_tt,
+ bool inner)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+ void *hfso;
+
+ if (builder->modify)
+ MLX5_SET(modify_tir_in, builder->in, bitmask.hash, 1);
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_hfunc_to_hw(rss_hash->hfunc));
+ if (rss_hash->hfunc == ETH_RSS_HASH_TOP) {
+ const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
+ void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ memcpy(rss_key, rss_hash->toeplitz_hash_key, len);
+ }
+
+ if (inner)
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
+ else
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, rss_tt->l3_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, rss_tt->l4_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields, rss_tt->rx_hash_fields);
+}
+
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
+}
+
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, tls_en, 1);
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST |
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST);
+}
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+ struct mlx5_core_dev *mdev, bool reg)
+{
+ int err;
+
+ tir->mdev = mdev;
+
+ err = mlx5_core_create_tir(tir->mdev, builder->in, &tir->tirn);
+ if (err)
+ return err;
+
+ if (reg) {
+ struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+ mutex_lock(&res->td.list_lock);
+ list_add(&tir->list, &res->td.tirs_list);
+ mutex_unlock(&res->td.list_lock);
+ } else {
+ INIT_LIST_HEAD(&tir->list);
+ }
+
+ return 0;
+}
+
+void mlx5e_tir_destroy(struct mlx5e_tir *tir)
+{
+ struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+ /* Skip mutex if list_del is no-op (the TIR wasn't registered in the
+ * list). list_empty will never return true for an item of tirs_list,
+ * and READ_ONCE/WRITE_ONCE in list_empty/list_del guarantee consistency
+ * of the list->next value.
+ */
+ if (!list_empty(&tir->list)) {
+ mutex_lock(&res->td.list_lock);
+ list_del(&tir->list);
+ mutex_unlock(&res->td.list_lock);
+ }
+
+ mlx5_core_destroy_tir(tir->mdev, tir->tirn);
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder)
+{
+ return mlx5_core_modify_tir(tir->mdev, tir->tirn, builder->in);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
new file mode 100644
index 000000000..857a84bcd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_TIR_H__
+#define __MLX5_EN_TIR_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_rss_params_hash {
+ u8 hfunc;
+ u8 toeplitz_hash_key[40];
+};
+
+struct mlx5e_rss_params_traffic_type {
+ u8 l3_prot_type;
+ u8 l4_prot_type;
+ u32 rx_hash_fields;
+};
+
+struct mlx5e_tir_builder;
+struct mlx5e_packet_merge_param;
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ u32 rqtn, bool inner_ft_support);
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_packet_merge_param *pkt_merge_param);
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_rss_params_hash *rss_hash,
+ const struct mlx5e_rss_params_traffic_type *rss_tt,
+ bool inner);
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder);
+
+struct mlx5_core_dev;
+
+struct mlx5e_tir {
+ struct mlx5_core_dev *mdev;
+ u32 tirn;
+ struct list_head list;
+};
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+ struct mlx5_core_dev *mdev, bool reg);
+void mlx5e_tir_destroy(struct mlx5e_tir *tir);
+
+static inline u32 mlx5e_tir_get_tirn(struct mlx5e_tir *tir)
+{
+ return tir->tirn;
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder);
+
+#endif /* __MLX5_EN_TIR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
new file mode 100644
index 000000000..201ac7dd3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies */
+
+#include <net/page_pool.h>
+#include "en/txrx.h"
+#include "en/params.h"
+#include "en/trap.h"
+
+static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_trap *trap_ctx = container_of(napi, struct mlx5e_trap, napi);
+ struct mlx5e_ch_stats *ch_stats = trap_ctx->stats;
+ struct mlx5e_rq *rq = &trap_ctx->rq;
+ bool busy = false;
+ int work_done = 0;
+
+ rcu_read_lock();
+
+ ch_stats->poll++;
+
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+ busy |= work_done == budget;
+ busy |= rq->post_wqes(rq);
+
+ if (busy) {
+ work_done = budget;
+ goto out;
+ }
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ goto out;
+
+ mlx5e_cq_arm(&rq->cq);
+
+out:
+ rcu_read_unlock();
+ return work_done;
+}
+
+static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = t->mdev;
+ struct mlx5e_priv *priv = t->priv;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = t->pdev;
+ rq->netdev = priv->netdev;
+ rq->priv = priv;
+ rq->clock = &mdev->clock;
+ rq->tstamp = &priv->tstamp;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->stats = &priv->trap_stats.rq;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ xdp_rxq_info_unused(&rq->xdp_rxq);
+ mlx5e_rq_set_trap_handlers(rq, params);
+}
+
+static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t)
+{
+ struct mlx5e_rq_param *rq_param = &t->rq_param;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder trap_moder = {};
+ struct mlx5e_rq *rq = &t->rq;
+ int node;
+ int err;
+
+ node = dev_to_node(mdev->device);
+
+ ccp.node = node;
+ ccp.ch_stats = t->stats;
+ ccp.napi = &t->napi;
+ ccp.ix = 0;
+ err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, &rq->cq);
+ if (err)
+ return err;
+
+ mlx5e_init_trap_rq(t, &t->params, rq);
+ err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq);
+ if (err)
+ goto err_destroy_cq;
+
+ return 0;
+
+err_destroy_cq:
+ mlx5e_close_cq(&rq->cq);
+
+ return err;
+}
+
+static void mlx5e_close_trap_rq(struct mlx5e_rq *rq)
+{
+ mlx5e_close_rq(rq);
+ mlx5e_close_cq(&rq->cq);
+}
+
+static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
+ u32 rqn)
+{
+ struct mlx5e_tir_builder *builder;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ mlx5e_tir_builder_build_inline(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqn);
+ err = mlx5e_tir_init(tir, builder, mdev, true);
+
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
+
+static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev,
+ int max_mtu, u16 q_counter,
+ struct mlx5e_trap *t)
+{
+ struct mlx5e_params *params = &t->params;
+
+ params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+ mlx5e_init_rq_type_params(mdev, params);
+ params->sw_mtu = max_mtu;
+ mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param);
+}
+
+static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
+{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, 0));
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_trap *t;
+ int err;
+
+ t = kvzalloc_node(sizeof(*t), GFP_KERNEL, cpu_to_node(cpu));
+ if (!t)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t);
+
+ t->priv = priv;
+ t->mdev = priv->mdev;
+ t->tstamp = &priv->tstamp;
+ t->pdev = mlx5_core_dma_dev(priv->mdev);
+ t->netdev = priv->netdev;
+ t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
+ t->stats = &priv->trap_stats.ch;
+
+ netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll);
+
+ err = mlx5e_open_trap_rq(priv, t);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ err = mlx5e_create_trap_direct_rq_tir(t->mdev, &t->tir, t->rq.rqn);
+ if (err)
+ goto err_close_trap_rq;
+
+ return t;
+
+err_close_trap_rq:
+ mlx5e_close_trap_rq(&t->rq);
+err_napi_del:
+ netif_napi_del(&t->napi);
+ kvfree(t);
+ return ERR_PTR(err);
+}
+
+void mlx5e_close_trap(struct mlx5e_trap *trap)
+{
+ mlx5e_tir_destroy(&trap->tir);
+ mlx5e_close_trap_rq(&trap->rq);
+ netif_napi_del(&trap->napi);
+ kvfree(trap);
+}
+
+static void mlx5e_activate_trap(struct mlx5e_trap *trap)
+{
+ napi_enable(&trap->napi);
+ mlx5e_activate_rq(&trap->rq);
+ mlx5e_trigger_napi_sched(&trap->napi);
+}
+
+void mlx5e_deactivate_trap(struct mlx5e_priv *priv)
+{
+ struct mlx5e_trap *trap = priv->en_trap;
+
+ mlx5e_deactivate_rq(&trap->rq);
+ napi_disable(&trap->napi);
+}
+
+static struct mlx5e_trap *mlx5e_add_trap_queue(struct mlx5e_priv *priv)
+{
+ struct mlx5e_trap *trap;
+
+ trap = mlx5e_open_trap(priv);
+ if (IS_ERR(trap))
+ goto out;
+
+ mlx5e_activate_trap(trap);
+out:
+ return trap;
+}
+
+static void mlx5e_del_trap_queue(struct mlx5e_priv *priv)
+{
+ mlx5e_deactivate_trap(priv);
+ mlx5e_close_trap(priv->en_trap);
+ priv->en_trap = NULL;
+}
+
+static int mlx5e_trap_get_tirn(struct mlx5e_trap *en_trap)
+{
+ return en_trap->tir.tirn;
+}
+
+static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id)
+{
+ bool open_queue = !priv->en_trap;
+ struct mlx5e_trap *trap;
+ int err;
+
+ if (open_queue) {
+ trap = mlx5e_add_trap_queue(priv);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+ priv->en_trap = trap;
+ }
+
+ switch (trap_id) {
+ case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
+ err = mlx5e_add_vlan_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ if (err)
+ goto err_out;
+ break;
+ case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
+ err = mlx5e_add_mac_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ if (err)
+ goto err_out;
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
+ err = -EINVAL;
+ goto err_out;
+ }
+ return 0;
+
+err_out:
+ if (open_queue)
+ mlx5e_del_trap_queue(priv);
+ return err;
+}
+
+static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id)
+{
+ switch (trap_id) {
+ case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
+ mlx5e_remove_vlan_trap(priv->fs);
+ break;
+ case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
+ mlx5e_remove_mac_trap(priv->fs);
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
+ return -EINVAL;
+ }
+ if (priv->en_trap && !mlx5_devlink_trap_get_num_active(priv->mdev))
+ mlx5e_del_trap_queue(priv);
+
+ return 0;
+}
+
+int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx)
+{
+ int err = 0;
+
+ /* Traps are unarmed when interface is down, no need to update
+ * them. The configuration is saved in the core driver,
+ * queried and applied upon interface up operation in
+ * mlx5e_open_locked().
+ */
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ switch (trap_ctx->action) {
+ case DEVLINK_TRAP_ACTION_TRAP:
+ err = mlx5e_handle_action_trap(priv, trap_ctx->id);
+ break;
+ case DEVLINK_TRAP_ACTION_DROP:
+ err = mlx5e_handle_action_drop(priv, trap_ctx->id);
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unsupported action %d\n", __func__,
+ trap_ctx->action);
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static int mlx5e_apply_trap(struct mlx5e_priv *priv, int trap_id, bool enable)
+{
+ enum devlink_trap_action action;
+ int err;
+
+ err = mlx5_devlink_traps_get_action(priv->mdev, trap_id, &action);
+ if (err)
+ return err;
+ if (action == DEVLINK_TRAP_ACTION_TRAP)
+ err = enable ? mlx5e_handle_action_trap(priv, trap_id) :
+ mlx5e_handle_action_drop(priv, trap_id);
+ return err;
+}
+
+static const int mlx5e_traps_arr[] = {
+ DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER,
+ DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER,
+};
+
+int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5e_traps_arr); i++) {
+ err = mlx5e_apply_trap(priv, mlx5e_traps_arr[i], enable);
+ if (err)
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h
new file mode 100644
index 000000000..aa3f17658
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies */
+
+#ifndef __MLX5E_TRAP_H__
+#define __MLX5E_TRAP_H__
+
+#include "../en.h"
+#include "../devlink.h"
+
+struct mlx5e_trap {
+ /* data path */
+ struct mlx5e_rq rq;
+ struct mlx5e_tir tir;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+
+ /* data path - accessed per napi poll */
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
+
+ struct mlx5e_params params;
+ struct mlx5e_rq_param rq_param;
+};
+
+void mlx5e_close_trap(struct mlx5e_trap *trap);
+void mlx5e_deactivate_trap(struct mlx5e_priv *priv);
+int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx);
+int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
new file mode 100644
index 000000000..344245c01
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -0,0 +1,494 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TXRX_H___
+#define __MLX5_EN_TXRX_H___
+
+#include "en.h"
+#include <linux/indirect_call_wrapper.h>
+
+#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+/* IPSEC inline data includes:
+ * 1. ESP trailer: up to 255 bytes of padding, 1 byte for pad length, 1 byte for
+ * next header.
+ * 2. ESP authentication data: 16 bytes for ICV.
+ */
+#define MLX5E_MAX_TX_IPSEC_DS DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + \
+ 255 + 1 + 1 + 16, MLX5_SEND_WQE_DS)
+
+/* 366 should be big enough to cover all L2, L3 and L4 headers with possible
+ * encapsulations.
+ */
+#define MLX5E_MAX_TX_INLINE_DS DIV_ROUND_UP(366 - INL_HDR_START_SZ + VLAN_HLEN, \
+ MLX5_SEND_WQE_DS)
+
+/* Sync the calculation with mlx5e_sq_calc_wqe_attr. */
+#define MLX5E_MAX_TX_WQEBBS DIV_ROUND_UP(MLX5E_TX_WQE_EMPTY_DS_COUNT + \
+ MLX5E_MAX_TX_INLINE_DS + \
+ MLX5E_MAX_TX_IPSEC_DS + \
+ MAX_SKB_FRAGS + 1, \
+ MLX5_SEND_WQEBB_NUM_DS)
+
+#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
+
+static inline
+ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts)
+{
+ return INDIRECT_CALL_2(func, mlx5_real_time_cyc2time, mlx5_timecounter_cyc2time,
+ clock, cqe_ts);
+}
+
+enum mlx5e_icosq_wqe_type {
+ MLX5E_ICOSQ_WQE_NOP,
+ MLX5E_ICOSQ_WQE_UMR_RX,
+ MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
+#ifdef CONFIG_MLX5_EN_TLS
+ MLX5E_ICOSQ_WQE_UMR_TLS,
+ MLX5E_ICOSQ_WQE_SET_PSV_TLS,
+ MLX5E_ICOSQ_WQE_GET_PSV_TLS,
+#endif
+};
+
+/* General */
+static inline bool mlx5e_skb_is_multicast(struct sk_buff *skb)
+{
+ return skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST;
+}
+
+void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
+int mlx5e_napi_poll(struct napi_struct *napi, int budget);
+int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
+
+/* RX */
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle);
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
+int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
+void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
+
+/* TX */
+netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
+bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
+
+static inline bool
+mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
+{
+ return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
+}
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+ return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
+}
+
+static inline void *mlx5e_fetch_wqe(struct mlx5_wq_cyc *wq, u16 pi, size_t wqe_size)
+{
+ void *wqe;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(wqe, 0, wqe_size);
+
+ return wqe;
+}
+
+#define MLX5E_TX_FETCH_WQE(sq, pi) \
+ ((struct mlx5e_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_tx_wqe)))
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+ cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+ (*pc)++;
+
+ return wqe;
+}
+
+struct mlx5e_tx_wqe_info {
+ struct sk_buff *skb;
+ u32 num_bytes;
+ u8 num_wqebbs;
+ u8 num_dma;
+ u8 num_fifo_pkts;
+#ifdef CONFIG_MLX5_EN_TLS
+ struct page *resync_dump_frag_page;
+#endif
+};
+
+static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_tx_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
+void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq);
+
+static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
+}
+
+struct mlx5e_shampo_umr {
+ u16 len;
+};
+
+struct mlx5e_icosq_wqe_info {
+ u8 wqe_type;
+ u8 num_wqebbs;
+
+ /* Auxiliary data for different wqe types. */
+ union {
+ struct {
+ struct mlx5e_rq *rq;
+ } umr;
+ struct mlx5e_shampo_umr shampo;
+#ifdef CONFIG_MLX5_EN_TLS
+ struct {
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ } tls_set_params;
+ struct {
+ struct mlx5e_ktls_rx_resync_buf *buf;
+ } tls_get_params;
+#endif
+ };
+};
+
+void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq);
+
+static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_icosq_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_NOP,
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
+static inline void
+mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
+ struct mlx5_wqe_ctrl_seg *ctrl)
+{
+ ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ *wq->db = cpu_to_be32(pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, uar_map);
+}
+
+static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq;
+
+ mcq = &cq->mcq;
+ mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+}
+
+static inline struct mlx5e_sq_dma *
+mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+ return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static inline void
+mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
+ enum mlx5e_dma_map_type map_type)
+{
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
+
+ dma->addr = addr;
+ dma->size = size;
+ dma->type = map_type;
+}
+
+static inline
+struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_skb_fifo *fifo, u16 i)
+{
+ return &fifo->fifo[i & fifo->mask];
+}
+
+static inline
+void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
+{
+ struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
+
+ *skb_item = skb;
+}
+
+static inline
+struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo)
+{
+ WARN_ON_ONCE(*fifo->pc == *fifo->cc);
+
+ return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++);
+}
+
+static inline void
+mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
+{
+ switch (dma->type) {
+ case MLX5E_DMA_MAP_SINGLE:
+ dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ case MLX5E_DMA_MAP_PAGE:
+ dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ default:
+ WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+ }
+}
+
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
+
+static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
+{
+ return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
+}
+
+static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
+{
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ mlx5_wq_ll_reset(&rq->mpwqe.wq);
+ rq->mpwqe.actual_wq_head = 0;
+ } else {
+ mlx5_wq_cyc_reset(&rq->wqe.wq);
+ }
+}
+
+static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 qn,
+ struct mlx5_err_cqe *err_cqe)
+{
+ struct mlx5_cqwq *wq = &cq->wq;
+ u32 ci;
+
+ ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
+
+ netdev_err(cq->netdev,
+ "Error cqe on cqn 0x%x, ci 0x%x, qn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+ cq->mcq.cqn, ci, qn,
+ get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
+ err_cqe->syndrome, err_cqe->vendor_err_synd);
+ mlx5_dump_err_cqe(cq->mdev, err_cqe);
+}
+
+static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_size(&rq->wqe.wq);
+ }
+}
+
+static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return rq->mpwqe.wq.cur_sz;
+ default:
+ return rq->wqe.wq.cur_sz;
+ }
+}
+
+static inline u16 mlx5e_rqwq_get_head(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_head(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_head(&rq->wqe.wq);
+ }
+}
+
+static inline u16 mlx5e_rqwq_get_wqe_counter(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_counter(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_counter(&rq->wqe.wq);
+ }
+}
+
+/* SW parser related functions */
+
+struct mlx5e_swp_spec {
+ __be16 l3_proto;
+ u8 l4_proto;
+ u8 is_tun;
+ __be16 tun_l3_proto;
+ u8 tun_l4_proto;
+};
+
+static inline void mlx5e_eseg_swp_offsets_add_vlan(struct mlx5_wqe_eth_seg *eseg)
+{
+ /* SWP offsets are in 2-bytes words */
+ eseg->swp_outer_l3_offset += VLAN_HLEN / 2;
+ eseg->swp_outer_l4_offset += VLAN_HLEN / 2;
+ eseg->swp_inner_l3_offset += VLAN_HLEN / 2;
+ eseg->swp_inner_l4_offset += VLAN_HLEN / 2;
+}
+
+static inline void
+mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
+ struct mlx5e_swp_spec *swp_spec)
+{
+ /* SWP offsets are in 2-bytes words */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+ if (swp_spec->l4_proto) {
+ eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
+ if (swp_spec->l4_proto == IPPROTO_UDP)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
+ }
+
+ if (swp_spec->is_tun) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
+ eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ }
+ switch (swp_spec->tun_l4_proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ }
+}
+
+#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1)
+
+static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size)
+{
+ WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < mlx5e_get_max_sq_wqebbs(mdev));
+
+ /* A WQE must not cross the page boundary, hence two conditions:
+ * 1. Its size must not exceed the page size.
+ * 2. If the WQE size is X, and the space remaining in a page is less
+ * than X, this space needs to be padded with NOPs. So, one WQE of
+ * size X may require up to X-1 WQEBBs of padding, which makes the
+ * stop room of X-1 + X.
+ * WQE size is also limited by the hardware limit.
+ */
+ WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev),
+ "wqe_size %u is greater than max SQ WQEBBs %u",
+ wqe_size, mlx5e_get_max_sq_wqebbs(mdev));
+
+ return MLX5E_STOP_ROOM(wqe_size);
+}
+
+static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev)
+{
+ return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev));
+}
+
+static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev)
+{
+ u8 mpwqe_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+
+ return mlx5e_stop_room_for_wqe(mdev, mpwqe_wqebbs);
+}
+
+static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size)
+{
+ u16 room = sq->reserved_room + MLX5E_STOP_ROOM(wqe_size);
+
+ return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room);
+}
+
+static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
+{
+ size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
+
+ return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
new file mode 100644
index 000000000..20507ef2f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include "en/xdp.h"
+#include "en/params.h"
+
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
+{
+ int hr = mlx5e_get_linear_rq_headroom(params, xsk);
+
+ /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+ * The condition checked in mlx5e_rx_is_linear_skb is:
+ * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
+ * (Note that hw_mtu == sw_mtu + hard_mtu.)
+ * What is returned from this function is:
+ * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
+ * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+ * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+ * because both PAGE_SIZE and S are already aligned. Any number greater
+ * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+ * so max_mtu is the maximum MTU allowed.
+ */
+
+ return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
+static inline bool
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
+ struct page *page, struct xdp_buff *xdp)
+{
+ struct skb_shared_info *sinfo = NULL;
+ struct mlx5e_xmit_data xdptxd;
+ struct mlx5e_xdp_info xdpi;
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ int i;
+
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf))
+ return false;
+
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+
+ if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
+ /* The xdp_buff was in the UMEM and was copied into a newly
+ * allocated page. The UMEM page was returned via the ZCA, and
+ * this new page has to be mapped at this point and has to be
+ * unmapped and returned via xdp_return_frame on completion.
+ */
+
+ /* Prevent double recycling of the UMEM page. Even in case this
+ * function returns false, the xdp_buff shouldn't be recycled,
+ * as it was already done in xdp_convert_zc_to_xdp_frame.
+ */
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+
+ dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(sq->pdev, dma_addr)) {
+ xdp_return_frame(xdpf);
+ return false;
+ }
+
+ xdptxd.dma_addr = dma_addr;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = dma_addr;
+
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
+ return false;
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ return true;
+ }
+
+ /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
+ * that points to the same memory region as the original xdp_buff. It
+ * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
+ * mode.
+ */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+ xdpi.page.rq = rq;
+
+ dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+ u32 len;
+
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+ len = skb_frag_size(frag);
+ dma_sync_single_for_device(sq->pdev, addr, len,
+ DMA_BIDIRECTIONAL);
+ }
+ }
+
+ xdptxd.dma_addr = dma_addr;
+
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
+ return false;
+
+ xdpi.page.page = page;
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ xdpi.page.page = skb_frag_page(frag);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ }
+ }
+
+ return true;
+}
+
+/* returns true if packet was consumed by xdp */
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp)
+{
+ u32 act;
+ int err;
+
+ act = bpf_prog_run_xdp(prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ return false;
+ case XDP_TX:
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp)))
+ goto xdp_abort;
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+ return true;
+ case XDP_REDIRECT:
+ /* When XDP enabled then page-refcnt==1 here */
+ err = xdp_do_redirect(rq->netdev, xdp, prog);
+ if (unlikely(err))
+ goto xdp_abort;
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
+ __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
+ if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
+ mlx5e_page_dma_unmap(rq, page);
+ rq->stats->xdp_redirect++;
+ return true;
+ default:
+ bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+xdp_abort:
+ trace_xdp_exception(rq->netdev, prog, act);
+ fallthrough;
+ case XDP_DROP:
+ rq->stats->xdp_drop++;
+ return true;
+ }
+}
+
+static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_xdp_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = 1,
+ .num_pkts = 0,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nops += contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ net_prefetchw(wqe->data);
+
+ *session = (struct mlx5e_tx_mpwqe) {
+ .wqe = wqe,
+ .bytes_count = 0,
+ .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+ .pkt_count = 0,
+ .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
+ };
+
+ stats->mpwqe++;
+}
+
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
+ u16 ds_count = session->ds_count;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+ wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
+ wi->num_pkts = session->pkt_count;
+
+ sq->pc += wi->num_wqebbs;
+
+ sq->doorbell_cseg = cseg;
+
+ session->wqe = NULL; /* Close session */
+}
+
+enum {
+ MLX5E_XDP_CHECK_OK = 1,
+ MLX5E_XDP_CHECK_START_MPWQE = 2,
+};
+
+INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!sq->mpwqe.wqe)) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ sq->stop_room))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_START_MPWQE;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
+INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result);
+
+INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+ if (unlikely(sinfo)) {
+ /* MPWQE is enabled, but a multi-buffer packet is queued for
+ * transmission. MPWQE can't send fragmented packets, so close
+ * the current session and fall back to a regular WQE.
+ */
+ if (unlikely(sq->mpwqe.wqe))
+ mlx5e_xdp_mpwqe_complete(sq);
+ return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
+ }
+
+ if (unlikely(xdptxd->len > sq->hw_mtu)) {
+ stats->err++;
+ return false;
+ }
+
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
+ if (unlikely(check_result < 0))
+ return false;
+
+ if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
+ /* Start the session when nothing can fail, so it's guaranteed
+ * that if there is an active session, it has at least one dseg,
+ * and it's safe to complete it at any time.
+ */
+ mlx5e_xdp_mpwqe_session_start(sq);
+ }
+
+ mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
+
+ if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ stats->xmit++;
+ return true;
+}
+
+static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
+INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+ return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
+}
+
+INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5e_tx_wqe *wqe;
+
+ dma_addr_t dma_addr = xdptxd->dma_addr;
+ u32 dma_len = xdptxd->len;
+ u16 ds_cnt, inline_hdr_sz;
+ u8 num_wqebbs = 1;
+ int num_frags = 0;
+ u16 pi;
+
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+ if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
+ stats->err++;
+ return false;
+ }
+
+ ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
+ ds_cnt++;
+
+ /* check_result must be 0 if sinfo is passed. */
+ if (!check_result) {
+ int stop_room = 1;
+
+ if (unlikely(sinfo)) {
+ ds_cnt += sinfo->nr_frags;
+ num_frags = sinfo->nr_frags;
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
+ * enough to hold all fragments.
+ */
+ stop_room = MLX5E_STOP_ROOM(num_wqebbs);
+ }
+
+ check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
+ }
+ if (unlikely(check_result < 0))
+ return false;
+
+ pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ net_prefetchw(wqe);
+
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ inline_hdr_sz = 0;
+
+ /* copy the inline part if required */
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
+ memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
+ MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
+ dma_len -= MLX5E_XDP_MIN_INLINE;
+ dma_addr += MLX5E_XDP_MIN_INLINE;
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ dseg++;
+ }
+
+ /* write the dma part */
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
+
+ if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
+ u8 num_pkts = 1 + num_frags;
+ int i;
+
+ memset(&cseg->trailer, 0, sizeof(cseg->trailer));
+ memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
+
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+ dseg->lkey = sq->mkey_be;
+
+ for (i = 0; i < num_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+
+ dseg++;
+ dseg->addr = cpu_to_be64(addr);
+ dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
+ dseg->lkey = sq->mkey_be;
+ }
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+
+ sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = num_wqebbs,
+ .num_pkts = num_pkts,
+ };
+
+ sq->pc += num_wqebbs;
+ } else {
+ cseg->fm_ce_se = 0;
+
+ sq->pc++;
+ }
+
+ sq->doorbell_cseg = cseg;
+
+ stats->xmit++;
+ return true;
+}
+
+static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_wqe_info *wi,
+ u32 *xsk_frames,
+ bool recycle,
+ struct xdp_frame_bulk *bq)
+{
+ struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+ u16 i;
+
+ for (i = 0; i < wi->num_pkts; i++) {
+ struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+
+ switch (xdpi.mode) {
+ case MLX5E_XDP_XMIT_MODE_FRAME:
+ /* XDP_TX from the XSK RQ and XDP_REDIRECT */
+ dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
+ xdpi.frame.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame_bulk(xdpi.frame.xdpf, bq);
+ break;
+ case MLX5E_XDP_XMIT_MODE_PAGE:
+ /* XDP_TX from the regular RQ */
+ mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
+ break;
+ case MLX5E_XDP_XMIT_MODE_XSK:
+ /* AF_XDP send */
+ (*xsk_frames)++;
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ }
+ }
+}
+
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
+{
+ struct xdp_frame_bulk bq;
+ struct mlx5e_xdpsq *sq;
+ struct mlx5_cqe64 *cqe;
+ u32 xsk_frames = 0;
+ u16 sqcc;
+ int i;
+
+ xdp_frame_bulk_init(&bq);
+
+ sq = container_of(cq, struct mlx5e_xdpsq, cq);
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return false;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ i = 0;
+ do {
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 wqe_counter, ci;
+ bool last_wqe;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ do {
+ last_wqe = (sqcc == wqe_counter);
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+
+ sqcc += wi->num_wqebbs;
+
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq);
+ } while (!last_wqe);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ netdev_WARN_ONCE(sq->channel->netdev,
+ "Bad OP in XDPSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+ mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
+ (struct mlx5_err_cqe *)cqe);
+ mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
+ }
+ } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+ xdp_flush_frame_bulk(&bq);
+
+ if (xsk_frames)
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
+
+ sq->stats->cqes += i;
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->cc = sqcc;
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
+{
+ struct xdp_frame_bulk bq;
+ u32 xsk_frames = 0;
+
+ xdp_frame_bulk_init(&bq);
+
+ rcu_read_lock(); /* need for xdp_return_frame_bulk */
+
+ while (sq->cc != sq->pc) {
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ wi = &sq->db.wqe_info[ci];
+
+ sq->cc += wi->num_wqebbs;
+
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq);
+ }
+
+ xdp_flush_frame_bulk(&bq);
+ rcu_read_unlock();
+
+ if (xsk_frames)
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
+}
+
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_xdpsq *sq;
+ int nxmit = 0;
+ int sq_num;
+ int i;
+
+ /* this flag is sufficient, no need to test internal sq state */
+ if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
+ return -ENETDOWN;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ sq_num = smp_processor_id();
+
+ if (unlikely(sq_num >= priv->channels.num))
+ return -ENXIO;
+
+ sq = &priv->channels.c[sq_num]->xdpsq;
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ struct mlx5e_xmit_data xdptxd;
+ struct mlx5e_xdp_info xdpi;
+ bool ret;
+
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+ xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
+ xdptxd.len, DMA_TO_DEVICE);
+
+ if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr)))
+ break;
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = xdptxd.dma_addr;
+
+ ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
+ if (unlikely(!ret)) {
+ dma_unmap_single(sq->pdev, xdptxd.dma_addr,
+ xdptxd.len, DMA_TO_DEVICE);
+ break;
+ }
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ nxmit++;
+ }
+
+ if (flags & XDP_XMIT_FLUSH) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+ mlx5e_xmit_xdp_doorbell(sq);
+ }
+
+ return nxmit;
+}
+
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
+{
+ struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
+
+ if (xdpsq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(xdpsq);
+
+ mlx5e_xmit_xdp_doorbell(xdpsq);
+
+ if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) {
+ xdp_do_flush_map();
+ __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
+ }
+}
+
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
+{
+ sq->xmit_xdp_frame_check = is_mpw ?
+ mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
+ sq->xmit_xdp_frame = is_mpw ?
+ mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
new file mode 100644
index 000000000..bc2d9034a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_XDP_H__
+#define __MLX5_EN_XDP_H__
+
+#include <linux/indirect_call_wrapper.h>
+
+#include "en.h"
+#include "en/txrx.h"
+
+#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+
+#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16
+#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \
+ (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
+ sizeof(struct mlx5_wqe_inline_seg))
+
+struct mlx5e_xsk_param;
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp);
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo,
+ int check_result));
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo,
+ int check_result));
+INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
+INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
+
+static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+
+ if (priv->channels.params.xdp_prog)
+ set_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
+}
+
+static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
+{
+ if (priv->channels.params.xdp_prog)
+ clear_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
+
+ clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+ /* Let other device's napi(s) and XSK wakeups see our new state. */
+ synchronize_net();
+}
+
+static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
+}
+
+static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
+{
+ if (sq->doorbell_cseg) {
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+ sq->doorbell_cseg = NULL;
+ }
+}
+
+/* Enable inline WQEs to shift some load from a congested HCA (HW) to
+ * a less congested cpu (SW).
+ */
+static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
+{
+ u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc;
+
+#define MLX5E_XDP_INLINE_WATERMARK_LOW 10
+#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128
+
+ if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
+ return false;
+
+ if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
+ return true;
+
+ return cur;
+}
+
+static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
+{
+ if (session->inline_on)
+ return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
+ max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
+
+ return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs);
+}
+
+struct mlx5e_xdp_wqe_info {
+ u8 num_wqebbs;
+ u8 num_pkts;
+};
+
+static inline void
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xmit_data *xdptxd,
+ struct mlx5e_xdpsq_stats *stats)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg =
+ (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
+ u32 dma_len = xdptxd->len;
+
+ session->pkt_count++;
+ session->bytes_count += dma_len;
+
+ if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
+ struct mlx5_wqe_inline_seg *inline_dseg =
+ (struct mlx5_wqe_inline_seg *)dseg;
+ u16 ds_len = sizeof(*inline_dseg) + dma_len;
+ u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS);
+
+ inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG);
+ memcpy(inline_dseg->data, xdptxd->data, dma_len);
+
+ session->ds_count += ds_cnt;
+ stats->inlnw++;
+ return;
+ }
+
+ dseg->addr = cpu_to_be64(xdptxd->dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+ dseg->lkey = sq->mkey_be;
+ session->ds_count++;
+}
+
+static inline void
+mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
+ struct mlx5e_xdp_info *xi)
+{
+ u32 i = (*fifo->pc)++ & fifo->mask;
+
+ fifo->xi[i] = *xi;
+}
+
+static inline struct mlx5e_xdp_info
+mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
+{
+ return fifo->xi[(*fifo->cc)++ & fifo->mask];
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
new file mode 100644
index 000000000..ebada0c5a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#include <net/xdp_sock_drv.h>
+#include "pool.h"
+#include "setup.h"
+#include "en/params.h"
+
+static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
+{
+ struct device *dev = mlx5_core_dma_dev(priv->mdev);
+
+ return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
+{
+ return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk)
+{
+ if (!xsk->pools) {
+ xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+ sizeof(*xsk->pools), GFP_KERNEL);
+ if (unlikely(!xsk->pools))
+ return -ENOMEM;
+ }
+
+ xsk->refcnt++;
+ xsk->ever_used = true;
+
+ return 0;
+}
+
+static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk)
+{
+ if (!--xsk->refcnt) {
+ kfree(xsk->pools);
+ xsk->pools = NULL;
+ }
+}
+
+static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix)
+{
+ int err;
+
+ err = mlx5e_xsk_get_pools(xsk);
+ if (unlikely(err))
+ return err;
+
+ xsk->pools[ix] = pool;
+ return 0;
+}
+
+static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix)
+{
+ xsk->pools[ix] = NULL;
+
+ mlx5e_xsk_put_pools(xsk);
+}
+
+static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool)
+{
+ return xsk_pool_get_headroom(pool) <= 0xffff &&
+ xsk_pool_get_chunk_size(pool) <= 0xffff;
+}
+
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk)
+{
+ xsk->headroom = xsk_pool_get_headroom(pool);
+ xsk->chunk_size = xsk_pool_get_chunk_size(pool);
+ xsk->unaligned = pool->unaligned;
+}
+
+static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool, u16 ix)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ int err;
+
+ if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix)))
+ return -EBUSY;
+
+ if (unlikely(!mlx5e_xsk_is_pool_sane(pool)))
+ return -EINVAL;
+
+ err = mlx5e_xsk_map_pool(priv, pool);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix);
+ if (unlikely(err))
+ goto err_unmap_pool;
+
+ mlx5e_build_xsk_param(pool, &xsk);
+
+ if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) {
+ const char *recommendation = is_power_of_2(xsk.chunk_size) ?
+ "Upgrade firmware" : "Disable striding RQ";
+
+ mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n",
+ xsk.chunk_size, recommendation);
+ }
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ /* XSK objects will be created on open. */
+ goto validate_closed;
+ }
+
+ if (!params->xdp_prog) {
+ /* XSK objects will be created when an XDP program is set,
+ * and the channels are reopened.
+ */
+ goto validate_closed;
+ }
+
+ c = priv->channels.c[ix];
+
+ err = mlx5e_open_xsk(priv, params, &xsk, pool, c);
+ if (unlikely(err))
+ goto err_remove_pool;
+
+ mlx5e_activate_xsk(c);
+ mlx5e_trigger_napi_icosq(c);
+
+ /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+ * any Fill Ring entries at the setup stage.
+ */
+
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true);
+
+ mlx5e_deactivate_rq(&c->rq);
+ mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY);
+
+ return 0;
+
+err_remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
+
+err_unmap_pool:
+ mlx5e_xsk_unmap_pool(priv, pool);
+
+ return err;
+
+validate_closed:
+ /* Check the configuration in advance, rather than fail at a later stage
+ * (in mlx5e_xdp_set or on open) and end up with no channels.
+ */
+ if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+ err = -EINVAL;
+ goto err_remove_pool;
+ }
+
+ return 0;
+}
+
+static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+ struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params,
+ &priv->xsk, ix);
+ struct mlx5e_channel *c;
+
+ if (unlikely(!pool))
+ return -EINVAL;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto remove_pool;
+
+ /* XSK RQ and SQ are only created if XDP program is set. */
+ if (!priv->channels.params.xdp_prog)
+ goto remove_pool;
+
+ c = priv->channels.c[ix];
+
+ mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_icosq(c);
+ mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT);
+
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false);
+
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
+ mlx5e_xsk_unmap_pool(priv, pool);
+
+ return 0;
+}
+
+static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool,
+ u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_enable_locked(priv, pool, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_disable_locked(priv, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+
+ if (unlikely(qid >= params->num_channels))
+ return -EINVAL;
+
+ return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) :
+ mlx5e_xsk_disable_pool(priv, qid);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
new file mode 100644
index 000000000..dca0010a0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_XSK_POOL_H__
+#define __MLX5_EN_XSK_POOL_H__
+
+#include "en.h"
+
+static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params,
+ struct mlx5e_xsk *xsk, u16 ix)
+{
+ if (!xsk || !xsk->pools)
+ return NULL;
+
+ if (unlikely(ix >= params->num_channels))
+ return NULL;
+
+ return xsk->pools[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid);
+
+#endif /* __MLX5_EN_XSK_POOL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
new file mode 100644
index 000000000..c91b54d9f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rx.h"
+#include "en/xdp.h"
+#include <net/xdp_sock_drv.h>
+#include <linux/filter.h>
+
+/* RX data path */
+
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ struct mlx5e_icosq *icosq = rq->icosq;
+ struct mlx5_wq_cyc *wq = &icosq->wq;
+ struct mlx5e_umr_wqe *umr_wqe;
+ int batch, i;
+ u32 offset; /* 17-bit value with MTT. */
+ u16 pi;
+
+ if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
+ goto err;
+
+ BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
+ batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
+ rq->mpwqe.pages_per_wqe);
+
+ /* If batch < pages_per_wqe, either:
+ * 1. Some (or all) descriptors were invalid.
+ * 2. dma_need_sync is true, and it fell back to allocating one frame.
+ * In either case, try to continue allocating frames one by one, until
+ * the first error, which will mean there are no more valid descriptors.
+ */
+ for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
+ wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!wi->alloc_units[batch].xsk))
+ goto err_reuse_batch;
+ }
+
+ pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
+
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
+ }
+ } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ }
+ } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
+ u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size * 2),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ };
+ }
+ } else {
+ __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
+ rq->xsk_pool->chunk_size);
+ __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ .bcount = frame_size,
+ };
+ umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ .bcount = pad_size,
+ };
+ }
+ }
+
+ bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ wi->consumed_strides = 0;
+
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
+
+ /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
+ offset = ix * rq->mpwqe.mtts_per_wqe;
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
+ offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
+ offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+
+ icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
+ .num_wqebbs = rq->mpwqe.umr_wqebbs,
+ .umr.rq = rq,
+ };
+
+ icosq->pc += rq->mpwqe.umr_wqebbs;
+
+ icosq->doorbell_cseg = &umr_wqe->ctrl;
+
+ return 0;
+
+err_reuse_batch:
+ while (--batch >= 0)
+ xsk_buff_free(wi->alloc_units[batch].xsk);
+
+err:
+ rq->stats->buff_alloc_err++;
+ return -ENOMEM;
+}
+
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct xdp_buff **buffs;
+ u32 contig, alloc;
+ int i;
+
+ /* mlx5e_init_frags_partition creates a 1:1 mapping between
+ * rq->wqe.frags and rq->wqe.alloc_units, which allows us to
+ * allocate XDP buffers straight into alloc_units.
+ */
+ BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
+ sizeof(rq->wqe.alloc_units[0].xsk));
+ buffs = (struct xdp_buff **)rq->wqe.alloc_units;
+ contig = mlx5_wq_cyc_get_size(wq) - ix;
+ if (wqe_bulk <= contig) {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
+ } else {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
+ if (likely(alloc == contig))
+ alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
+ }
+
+ for (i = 0; i < alloc; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return alloc;
+}
+
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int i;
+
+ for (i = 0; i < wqe_bulk; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!frag->au->xsk))
+ return i;
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return wqe_bulk;
+}
+
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
+{
+ u32 totallen = xdp->data_end - xdp->data_meta;
+ u32 metalen = xdp->data - xdp->data_meta;
+ struct sk_buff *skb;
+
+ skb = napi_alloc_skb(rq->cq.napi, totallen);
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ skb_put_data(skb, xdp->data_meta, totallen);
+
+ if (metalen) {
+ skb_metadata_set(skb, metalen);
+ __skb_pull(skb, metalen);
+ }
+
+ return skb;
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx)
+{
+ struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk;
+ struct bpf_prog *prog;
+
+ /* Check packet size. Note LRO doesn't use linear SKB */
+ if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+ rq->stats->oversize_pkts_sw_drop++;
+ return NULL;
+ }
+
+ /* head_offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, in
+ * the current implementation, UMR pages are mapped to XSK frames, so
+ * head_offset should always be 0.
+ */
+ WARN_ON_ONCE(head_offset);
+
+ xsk_buff_set_size(xdp, cqe_bcnt);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
+
+ /* Possible flows:
+ * - XDP_REDIRECT to XSKMAP:
+ * The page is owned by the userspace from now.
+ * - XDP_TX and other XDP_REDIRECTs:
+ * The page was returned by ZCA and recycled.
+ * - XDP_DROP:
+ * Recycle the page.
+ * - XDP_PASS:
+ * Allocate an SKB, copy the data and recycle the page.
+ *
+ * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
+ * size is the same as the Driver RX Ring's size, and pages for WQEs are
+ * allocated first from the Reuse Ring, so it has enough space.
+ */
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) {
+ if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
+ * frame. On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, xdp);
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
+{
+ struct xdp_buff *xdp = wi->au->xsk;
+ struct bpf_prog *prog;
+
+ /* wi->offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, the
+ * XSK allocator allocates frames per packet, instead of pages, so
+ * wi->offset should always be 0.
+ */
+ WARN_ON_ONCE(wi->offset);
+
+ xsk_buff_set_size(xdp, cqe_bcnt);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp)))
+ return NULL; /* page/packet was consumed by XDP */
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+ * will be handled by mlx5e_free_rx_wqe.
+ * On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, xdp);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
new file mode 100644
index 000000000..087c943bd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_RX_H__
+#define __MLX5_EN_XSK_RX_H__
+
+#include "en.h"
+
+/* RX data path */
+
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt);
+
+#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
new file mode 100644
index 000000000..ff03c4383
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "setup.h"
+#include "en/params.h"
+#include "en/txrx.h"
+#include "en/health.h"
+#include <net/xdp_sock_drv.h>
+
+/* The limitation of 2048 can be altered, but shouldn't go beyond the minimal
+ * stride size of striding RQ.
+ */
+#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE)
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev)
+{
+ /* AF_XDP doesn't support frames larger than PAGE_SIZE. */
+ if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
+ return false;
+
+ /* frag_sz is different for regular and XSK RQs, so ensure that linear
+ * SKB mode is possible.
+ */
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk);
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ return mlx5e_rx_is_linear_skb(mdev, params, xsk);
+ }
+}
+
+static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ u16 q_counter,
+ struct mlx5e_channel_param *cparam)
+{
+ mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq);
+ mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq);
+}
+
+static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct xsk_buff_pool *pool,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ int rq_xdp_ix;
+ int err;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = c->netdev;
+ rq->priv = c->priv;
+ rq->tstamp = c->tstamp;
+ rq->clock = &mdev->clock;
+ rq->icosq = &c->icosq;
+ rq->ix = c->ix;
+ rq->channel = c;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->xdpsq = &c->rq_xdpsq;
+ rq->xsk_pool = pool;
+ rq->stats = &c->priv->channel_stats[c->ix]->xskrq;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ rq_xdp_ix = c->ix;
+ err = mlx5e_rq_set_handlers(rq, params, xsk);
+ if (err)
+ return err;
+
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0);
+}
+
+static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool,
+ struct mlx5e_xsk_param *xsk)
+{
+ int err;
+
+ err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq);
+ if (err)
+ return err;
+
+ return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq);
+}
+
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
+ struct mlx5e_channel *c)
+{
+ struct mlx5e_channel_param *cparam;
+ struct mlx5e_create_cq_param ccp;
+ int err;
+
+ mlx5e_build_create_cq_param(&ccp, c);
+
+ if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
+ return -EINVAL;
+
+ cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL);
+ if (!cparam)
+ return -ENOMEM;
+
+ mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam);
+
+ err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
+ &c->xskrq.cq);
+ if (unlikely(err))
+ goto err_free_cparam;
+
+ err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk);
+ if (unlikely(err))
+ goto err_close_rx_cq;
+
+ err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp,
+ &c->xsksq.cq);
+ if (unlikely(err))
+ goto err_close_rq;
+
+ /* Create a separate SQ, so that when the buff pool is disabled, we could
+ * close this SQ safely and stop receiving CQEs. In other case, e.g., if
+ * the XDPSQ was used instead, we might run into trouble when the buff pool
+ * is disabled and then re-enabled, but the SQ continues receiving CQEs
+ * from the old buff pool.
+ */
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true);
+ if (unlikely(err))
+ goto err_close_tx_cq;
+
+ kvfree(cparam);
+
+ set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
+ return 0;
+
+err_close_tx_cq:
+ mlx5e_close_cq(&c->xsksq.cq);
+
+err_close_rq:
+ mlx5e_close_rq(&c->xskrq);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->xskrq.cq);
+
+err_free_cparam:
+ kvfree(cparam);
+
+ return err;
+}
+
+void mlx5e_close_xsk(struct mlx5e_channel *c)
+{
+ clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+ synchronize_net(); /* Sync with NAPI. */
+
+ mlx5e_close_rq(&c->xskrq);
+ mlx5e_close_cq(&c->xskrq.cq);
+ mlx5e_close_xdpsq(&c->xsksq);
+ mlx5e_close_cq(&c->xsksq.cq);
+
+ memset(&c->xskrq, 0, sizeof(c->xskrq));
+ memset(&c->xsksq, 0, sizeof(c->xsksq));
+}
+
+void mlx5e_activate_xsk(struct mlx5e_channel *c)
+{
+ /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid
+ * activating XSKRQ in the middle of recovery.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ mlx5e_reporter_icosq_resume_recovery(c);
+
+ /* TX queue is created active. */
+}
+
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
+{
+ /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the
+ * middle of recovery. Suspend the recovery to avoid it.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ mlx5e_reporter_icosq_resume_recovery(c);
+ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
+
+ /* TX queue is disabled on close. */
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
new file mode 100644
index 000000000..50e111b85
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_SETUP_H__
+#define __MLX5_EN_XSK_SETUP_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param;
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev);
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
+ struct mlx5e_channel *c);
+void mlx5e_close_xsk(struct mlx5e_channel *c);
+void mlx5e_activate_xsk(struct mlx5e_channel *c);
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
+
+#endif /* __MLX5_EN_XSK_SETUP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
new file mode 100644
index 000000000..367a9505c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "tx.h"
+#include "pool.h"
+#include "en/xdp.h"
+#include "en/params.h"
+#include <net/xdp_sock_drv.h>
+
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_channel *c;
+
+ if (unlikely(!mlx5e_xdp_is_active(priv)))
+ return -ENETDOWN;
+
+ if (unlikely(qid >= params->num_channels))
+ return -EINVAL;
+
+ c = priv->channels.c[qid];
+
+ if (!napi_if_scheduled_mark_missed(&c->napi)) {
+ /* To avoid WQE overrun, don't post a NOP if async_icosq is not
+ * active and not polled by NAPI. Return 0, because the upcoming
+ * activate will trigger the IRQ for us.
+ */
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->async_icosq.state)))
+ return 0;
+
+ if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state))
+ return 0;
+
+ mlx5e_trigger_napi_icosq(c);
+ }
+
+ return 0;
+}
+
+/* When TX fails (because of the size of the packet), we need to get completions
+ * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish
+ * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the
+ * same.
+ */
+static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_info *xdpi)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+ struct mlx5e_tx_wqe *nopwqe;
+
+ wi->num_wqebbs = 1;
+ wi->num_pkts = 1;
+
+ nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ sq->doorbell_cseg = &nopwqe->ctrl;
+}
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
+{
+ struct xsk_buff_pool *pool = sq->xsk_pool;
+ struct mlx5e_xmit_data xdptxd;
+ struct mlx5e_xdp_info xdpi;
+ bool work_done = true;
+ bool flush = false;
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
+
+ for (; budget; budget--) {
+ int check_result = INDIRECT_CALL_2(sq->xmit_xdp_frame_check,
+ mlx5e_xmit_xdp_frame_check_mpwqe,
+ mlx5e_xmit_xdp_frame_check,
+ sq);
+ struct xdp_desc desc;
+ bool ret;
+
+ if (unlikely(check_result < 0)) {
+ work_done = false;
+ break;
+ }
+
+ if (!xsk_tx_peek_desc(pool, &desc)) {
+ /* TX will get stuck until something wakes it up by
+ * triggering NAPI. Currently it's expected that the
+ * application calls sendto() if there are consumed, but
+ * not completed frames.
+ */
+ break;
+ }
+
+ xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr);
+ xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr);
+ xdptxd.len = desc.len;
+
+ xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
+
+ ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
+ check_result);
+ if (unlikely(!ret)) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ mlx5e_xsk_tx_post_err(sq, &xdpi);
+ } else {
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ }
+
+ flush = true;
+ }
+
+ if (flush) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+ mlx5e_xmit_xdp_doorbell(sq);
+
+ xsk_tx_release(pool);
+ }
+
+ return !(budget && work_done);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
new file mode 100644
index 000000000..9c505158b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_TX_H__
+#define __MLX5_EN_XSK_TX_H__
+
+#include "en.h"
+
+/* TX data path */
+
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
+
+#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
new file mode 100644
index 000000000..07187028f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_EN_ACCEL_H__
+#define __MLX5E_EN_ACCEL_H__
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/ktls.h"
+#include "en_accel/ktls_txrx.h"
+#include <en_accel/macsec.h>
+#include "en.h"
+#include "en/txrx.h"
+
+#if IS_ENABLED(CONFIG_GENEVE)
+#include <net/geneve.h>
+
+static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
+{
+ return mlx5_tx_swp_supported(mdev);
+}
+
+static inline void
+mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u16 ihs)
+{
+ struct mlx5e_swp_spec swp_spec = {};
+ unsigned int offset = 0;
+ __be16 l3_proto;
+ u8 l4_proto;
+
+ l3_proto = vlan_get_protocol(skb);
+ switch (l3_proto) {
+ case htons(ETH_P_IP):
+ l4_proto = ip_hdr(skb)->protocol;
+ break;
+ case htons(ETH_P_IPV6):
+ l4_proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
+ break;
+ default:
+ return;
+ }
+
+ if (l4_proto != IPPROTO_UDP ||
+ udp_hdr(skb)->dest != cpu_to_be16(GENEVE_UDP_PORT))
+ return;
+ swp_spec.l3_proto = l3_proto;
+ swp_spec.l4_proto = l4_proto;
+ swp_spec.is_tun = true;
+ if (inner_ip_hdr(skb)->version == 6) {
+ swp_spec.tun_l3_proto = htons(ETH_P_IPV6);
+ swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr;
+ } else {
+ swp_spec.tun_l3_proto = htons(ETH_P_IP);
+ swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol;
+ }
+
+ mlx5e_set_eseg_swp(skb, eseg, &swp_spec);
+ if (skb_vlan_tag_present(skb) && ihs)
+ mlx5e_eseg_swp_offsets_add_vlan(eseg);
+}
+
+#else
+static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
+{
+ return false;
+}
+
+#endif /* CONFIG_GENEVE */
+
+static inline void
+mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
+{
+ int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr);
+
+ udp_hdr(skb)->len = htons(payload_len);
+}
+
+struct mlx5e_accel_tx_state {
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_accel_tx_tls_state tls;
+#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5e_accel_tx_ipsec_state ipsec;
+#endif
+};
+
+static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *state)
+{
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ mlx5e_udp_gso_handle_tx_skb(skb);
+
+#ifdef CONFIG_MLX5_EN_TLS
+ /* May send SKBs and WQEs. */
+ if (mlx5e_ktls_skb_offloaded(skb))
+ if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb,
+ &state->tls)))
+ return false;
+#endif
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) {
+ if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec)))
+ return false;
+ }
+#endif
+
+#ifdef CONFIG_MLX5_EN_MACSEC
+ if (unlikely(mlx5e_macsec_skb_is_offload(skb))) {
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (unlikely(!mlx5e_macsec_handle_tx_skb(priv->macsec, skb)))
+ return false;
+ }
+#endif
+
+ return true;
+}
+
+static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq,
+ struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state))
+ return mlx5e_ipsec_tx_ids_len(&state->ipsec);
+#endif
+
+ return 0;
+}
+
+/* Part of the eseg touched by TX offloads */
+#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)
+
+static inline void mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, u16 ihs)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (xfrm_offload(skb))
+ mlx5e_ipsec_tx_build_eseg(priv, skb, eseg);
+#endif
+
+#ifdef CONFIG_MLX5_EN_MACSEC
+ if (unlikely(mlx5e_macsec_skb_is_offload(skb)))
+ mlx5e_macsec_tx_build_eseg(priv->macsec, skb, eseg);
+#endif
+
+#if IS_ENABLED(CONFIG_GENEVE)
+ if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
+ mlx5e_tx_tunnel_accel(skb, eseg, ihs);
+#endif
+}
+
+static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_state *state,
+ struct mlx5_wqe_inline_seg *inlseg)
+{
+#ifdef CONFIG_MLX5_EN_TLS
+ mlx5e_ktls_handle_tx_wqe(&wqe->ctrl, &state->tls);
+#endif
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) &&
+ state->ipsec.xo && state->ipsec.tailen)
+ mlx5e_ipsec_handle_tx_wqe(wqe, &state->ipsec, inlseg);
+#endif
+}
+
+static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
+{
+ return mlx5e_ktls_init_rx(priv);
+}
+
+static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv)
+{
+ mlx5e_ktls_cleanup_rx(priv);
+}
+
+static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv)
+{
+ return mlx5e_ktls_init_tx(priv);
+}
+
+static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv)
+{
+ mlx5e_ktls_cleanup_tx(priv);
+}
+#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
new file mode 100644
index 000000000..06c474049
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include <mlx5_core.h>
+#include "en_accel/fs_tcp.h"
+#include "fs_core.h"
+
+enum accel_fs_tcp_type {
+ ACCEL_FS_IPV4_TCP,
+ ACCEL_FS_IPV6_TCP,
+ ACCEL_FS_TCP_NUM_TYPES,
+};
+
+struct mlx5e_accel_fs_tcp {
+ struct mlx5e_flow_table tables[ACCEL_FS_TCP_NUM_TYPES];
+ struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES];
+};
+
+static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
+{
+ switch (i) {
+ case ACCEL_FS_IPV4_TCP:
+ return MLX5_TT_IPV4_TCP;
+ default: /* ACCEL_FS_IPV6_TCP */
+ return MLX5_TT_IPV6_TCP;
+ }
+}
+
+static void accel_fs_tcp_set_ipv4_flow(struct mlx5_flow_spec *spec, struct sock *sk)
+{
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_TCP);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 4);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &inet_sk(sk)->inet_daddr, 4);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &inet_sk(sk)->inet_rcv_saddr, 4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void accel_fs_tcp_set_ipv6_flow(struct mlx5_flow_spec *spec, struct sock *sk)
+{
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_TCP);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 6);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &sk->sk_v6_daddr, 16);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &inet6_sk(sk)->saddr, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+}
+#endif
+
+void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule)
+{
+ mlx5_del_flow_rules(rule);
+}
+
+struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
+ struct sock *sk, u32 tirn,
+ uint32_t flow_tag)
+{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_flow_table *ft = NULL;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *flow;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ switch (sk->sk_family) {
+ case AF_INET:
+ accel_fs_tcp_set_ipv4_flow(spec, sk);
+ ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP];
+ fs_dbg(fs, "%s flow is %pI4:%d -> %pI4:%d\n", __func__,
+ &inet_sk(sk)->inet_rcv_saddr,
+ inet_sk(sk)->inet_sport,
+ &inet_sk(sk)->inet_daddr,
+ inet_sk(sk)->inet_dport);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ if (!ipv6_only_sock(sk) &&
+ ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
+ accel_fs_tcp_set_ipv4_flow(spec, sk);
+ ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP];
+ } else {
+ accel_fs_tcp_set_ipv6_flow(spec, sk);
+ ft = &fs_tcp->tables[ACCEL_FS_IPV6_TCP];
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+
+ if (!ft) {
+ flow = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_sport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
+ ntohs(inet_sk(sk)->inet_sport));
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
+ ntohs(inet_sk(sk)->inet_dport));
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tirn;
+ if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG) {
+ spec->flow_context.flow_tag = flow_tag;
+ spec->flow_context.flags = FLOW_CONTEXT_HAS_TAG;
+ }
+
+ flow = mlx5_add_flow_rules(ft->t, spec, &flow_act, &dest, 1);
+
+ if (IS_ERR(flow))
+ fs_err(fs, "mlx5_add_flow_rules() failed, flow is %ld\n", PTR_ERR(flow));
+
+out:
+ kvfree(spec);
+ return flow;
+}
+
+static int accel_fs_tcp_add_default_rule(struct mlx5e_flow_steering *fs,
+ enum accel_fs_tcp_type type)
+{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_flow_table *accel_fs_t;
+ struct mlx5_flow_destination dest;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ int err = 0;
+
+ accel_fs_t = &fs_tcp->tables[type];
+
+ dest = mlx5_ttc_get_default_dest(ttc, fs_accel2tt(type));
+ rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add default rule failed, accel_fs type=%d, err %d\n",
+ __func__, type, err);
+ return err;
+ }
+
+ fs_tcp->default_rules[type] = rule;
+ return 0;
+}
+
+#define MLX5E_ACCEL_FS_TCP_NUM_GROUPS (2)
+#define MLX5E_ACCEL_FS_TCP_GROUP1_SIZE (BIT(16) - 1)
+#define MLX5E_ACCEL_FS_TCP_GROUP2_SIZE (BIT(0))
+#define MLX5E_ACCEL_FS_TCP_TABLE_SIZE (MLX5E_ACCEL_FS_TCP_GROUP1_SIZE +\
+ MLX5E_ACCEL_FS_TCP_GROUP2_SIZE)
+static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft,
+ enum accel_fs_tcp_type type)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_ACCEL_FS_TCP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kfree(ft->g);
+ ft->g = NULL;
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version);
+
+ switch (type) {
+ case ACCEL_FS_IPV4_TCP:
+ case ACCEL_FS_IPV6_TCP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ switch (type) {
+ case ACCEL_FS_IPV4_TCP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ break;
+ case ACCEL_FS_IPV6_TCP:
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ACCEL_FS_TCP_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Default Flow Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ACCEL_FS_TCP_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+out:
+ kvfree(in);
+
+ return err;
+}
+
+static int accel_fs_tcp_create_table(struct mlx5e_flow_steering *fs, enum accel_fs_tcp_type type)
+{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_flow_table *ft = &accel_tcp->tables[type];
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_ACCEL_FS_TCP_TABLE_SIZE;
+ ft_attr.level = MLX5E_ACCEL_FS_TCP_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ fs_dbg(fs, "Created fs accel table id %u level %u\n",
+ ft->t->id, ft->t->level);
+
+ err = accel_fs_tcp_create_groups(ft, type);
+ if (err)
+ goto err;
+
+ err = accel_fs_tcp_add_default_rule(fs, type);
+ if (err)
+ goto err;
+
+ return 0;
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static int accel_fs_tcp_disable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err, i;
+
+ for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
+ /* Modify ttc rules destination to point back to the indir TIRs */
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_accel2tt(i));
+ if (err) {
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_accel2tt(i), err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int accel_fs_tcp_enable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5_flow_destination dest = {};
+ int err, i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
+ dest.ft = accel_tcp->tables[i].t;
+
+ /* Modify ttc rules destination to point on the accel_fs FTs */
+ err = mlx5_ttc_fwd_dest(ttc, fs_accel2tt(i), &dest);
+ if (err) {
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_accel2tt(i), err);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void accel_fs_tcp_destroy_table(struct mlx5e_flow_steering *fs, int i)
+{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
+
+ if (IS_ERR_OR_NULL(fs_tcp->tables[i].t))
+ return;
+
+ mlx5_del_flow_rules(fs_tcp->default_rules[i]);
+ mlx5e_destroy_flow_table(&fs_tcp->tables[i]);
+ fs_tcp->tables[i].t = NULL;
+}
+
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ int i;
+
+ if (!accel_tcp)
+ return;
+
+ accel_fs_tcp_disable(fs);
+
+ for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++)
+ accel_fs_tcp_destroy_table(fs, i);
+
+ kvfree(accel_tcp);
+ mlx5e_fs_set_accel_tcp(fs, NULL);
+}
+
+int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_accel_fs_tcp *accel_tcp;
+ int i, err;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(mlx5e_fs_get_mdev(fs), ft_field_support.outer_ip_version))
+ return -EOPNOTSUPP;
+
+ accel_tcp = kvzalloc(sizeof(*accel_tcp), GFP_KERNEL);
+ if (!accel_tcp)
+ return -ENOMEM;
+ mlx5e_fs_set_accel_tcp(fs, accel_tcp);
+
+ for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
+ err = accel_fs_tcp_create_table(fs, i);
+ if (err)
+ goto err_destroy_tables;
+ }
+
+ err = accel_fs_tcp_enable(fs);
+ if (err)
+ goto err_destroy_tables;
+
+ return 0;
+
+err_destroy_tables:
+ while (--i >= 0)
+ accel_fs_tcp_destroy_table(fs, i);
+ kvfree(accel_tcp);
+ mlx5e_fs_set_accel_tcp(fs, NULL);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
new file mode 100644
index 000000000..a032bff48
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_ACCEL_FS_TCP_H__
+#define __MLX5E_ACCEL_FS_TCP_H__
+
+#include "en/fs.h"
+
+#ifdef CONFIG_MLX5_EN_TLS
+int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs);
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
+ struct sock *sk, u32 tirn,
+ uint32_t flow_tag);
+void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule);
+#else
+static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) { return 0; }
+static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) {}
+static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
+ struct sock *sk, u32 tirn,
+ uint32_t flow_tag)
+{ return ERR_PTR(-EOPNOTSUPP); }
+static inline void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) {}
+#endif
+
+#endif /* __MLX5E_ACCEL_FS_TCP_H__ */
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
new file mode 100644
index 000000000..a71560186
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/internal/geniv.h>
+#include <crypto/aead.h>
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+
+#include "en.h"
+#include "ipsec.h"
+#include "ipsec_rxtx.h"
+
+static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
+{
+ return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+}
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
+ unsigned int handle)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct xfrm_state *ret = NULL;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle)
+ if (sa_entry->handle == handle) {
+ ret = sa_entry->x;
+ xfrm_state_hold(ret);
+ break;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ unsigned int handle = sa_entry->ipsec_obj_id;
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ struct mlx5e_ipsec_sa_entry *_sa_entry;
+ unsigned long flags;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(ipsec->sadb_rx, _sa_entry, hlist, handle)
+ if (_sa_entry->handle == handle) {
+ rcu_read_unlock();
+ return -EEXIST;
+ }
+ rcu_read_unlock();
+
+ spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+ sa_entry->handle = handle;
+ hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
+ spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+
+ return 0;
+}
+
+static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+ hash_del_rcu(&sa_entry->hlist);
+ spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct xfrm_replay_state_esn *replay_esn;
+ u32 seq_bottom = 0;
+ u8 overlap;
+
+ if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
+ sa_entry->esn_state.trigger = 0;
+ return false;
+ }
+
+ replay_esn = sa_entry->x->replay_esn;
+ if (replay_esn->seq >= replay_esn->replay_window)
+ seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
+
+ overlap = sa_entry->esn_state.overlap;
+
+ sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
+ htonl(seq_bottom));
+
+ sa_entry->esn_state.trigger = 1;
+ if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
+ sa_entry->esn_state.overlap = 0;
+ return true;
+ } else if (unlikely(!overlap &&
+ (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
+ sa_entry->esn_state.overlap = 1;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
+ struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct xfrm_state *x = sa_entry->x;
+ struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
+ struct aead_geniv_ctx *geniv_ctx;
+ struct crypto_aead *aead;
+ unsigned int crypto_data_len, key_len;
+ int ivsize;
+
+ memset(attrs, 0, sizeof(*attrs));
+
+ /* key */
+ crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+ key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+
+ memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
+ aes_gcm->key_len = key_len * 8;
+
+ /* salt and seq_iv */
+ aead = x->data;
+ geniv_ctx = crypto_aead_ctx(aead);
+ ivsize = crypto_aead_ivsize(aead);
+ memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
+ memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
+ sizeof(aes_gcm->salt));
+
+ /* iv len */
+ aes_gcm->icv_len = x->aead->alg_icv_len;
+
+ /* esn */
+ if (sa_entry->esn_state.trigger) {
+ attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+ attrs->esn = sa_entry->esn_state.esn;
+ if (sa_entry->esn_state.overlap)
+ attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+ }
+
+ /* action */
+ attrs->action = (x->xso.dir == XFRM_DEV_OFFLOAD_OUT) ?
+ MLX5_ACCEL_ESP_ACTION_ENCRYPT :
+ MLX5_ACCEL_ESP_ACTION_DECRYPT;
+ /* flags */
+ attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
+ MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
+ MLX5_ACCEL_ESP_FLAGS_TUNNEL;
+
+ /* spi */
+ attrs->spi = be32_to_cpu(x->id.spi);
+
+ /* source , destination ips */
+ memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr));
+ memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr));
+ attrs->is_ipv6 = (x->props.family != AF_INET);
+}
+
+static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
+{
+ struct net_device *netdev = x->xso.real_dev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+
+ if (x->props.aalgo != SADB_AALG_NONE) {
+ netdev_info(netdev, "Cannot offload authenticated xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
+ netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->props.calgo != SADB_X_CALG_NONE) {
+ netdev_info(netdev, "Cannot offload compressed xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.flags & XFRM_STATE_ESN &&
+ !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_ESN)) {
+ netdev_info(netdev, "Cannot offload ESN xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.family != AF_INET &&
+ x->props.family != AF_INET6) {
+ netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->props.mode != XFRM_MODE_TRANSPORT &&
+ x->props.mode != XFRM_MODE_TUNNEL) {
+ dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->id.proto != IPPROTO_ESP) {
+ netdev_info(netdev, "Only ESP xfrm state may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->encap) {
+ netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n");
+ return -EINVAL;
+ }
+ if (!x->aead) {
+ netdev_info(netdev, "Cannot offload xfrm states without aead\n");
+ return -EINVAL;
+ }
+ if (x->aead->alg_icv_len != 128) {
+ netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+ return -EINVAL;
+ }
+ if ((x->aead->alg_key_len != 128 + 32) &&
+ (x->aead->alg_key_len != 256 + 32)) {
+ netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+ return -EINVAL;
+ }
+ if (x->tfcpad) {
+ netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n");
+ return -EINVAL;
+ }
+ if (!x->geniv) {
+ netdev_info(netdev, "Cannot offload xfrm states without geniv\n");
+ return -EINVAL;
+ }
+ if (strcmp(x->geniv, "seqiv")) {
+ netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void _update_xfrm_state(struct work_struct *work)
+{
+ struct mlx5e_ipsec_modify_state_work *modify_work =
+ container_of(work, struct mlx5e_ipsec_modify_state_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = container_of(
+ modify_work, struct mlx5e_ipsec_sa_entry, modify_work);
+
+ mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs);
+}
+
+static int mlx5e_xfrm_add_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
+ struct net_device *netdev = x->xso.real_dev;
+ struct mlx5e_priv *priv;
+ int err;
+
+ priv = netdev_priv(netdev);
+ if (!priv->ipsec)
+ return -EOPNOTSUPP;
+
+ err = mlx5e_xfrm_validate_state(x);
+ if (err)
+ return err;
+
+ sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+ if (!sa_entry) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sa_entry->x = x;
+ sa_entry->ipsec = priv->ipsec;
+
+ /* check esn */
+ mlx5e_ipsec_update_esn_state(sa_entry);
+
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
+ /* create hw context */
+ err = mlx5_ipsec_create_sa_ctx(sa_entry);
+ if (err)
+ goto err_xfrm;
+
+ err = mlx5e_accel_ipsec_fs_add_rule(priv, sa_entry);
+ if (err)
+ goto err_hw_ctx;
+
+ if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) {
+ err = mlx5e_ipsec_sadb_rx_add(sa_entry);
+ if (err)
+ goto err_add_rule;
+ } else {
+ sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
+ mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
+ }
+
+ INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state);
+ x->xso.offload_handle = (unsigned long)sa_entry;
+ goto out;
+
+err_add_rule:
+ mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry);
+err_hw_ctx:
+ mlx5_ipsec_free_sa_ctx(sa_entry);
+err_xfrm:
+ kfree(sa_entry);
+out:
+ return err;
+}
+
+static void mlx5e_xfrm_del_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+
+ if (x->xso.dir == XFRM_DEV_OFFLOAD_IN)
+ mlx5e_ipsec_sadb_rx_del(sa_entry);
+}
+
+static void mlx5e_xfrm_free_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+ struct mlx5e_priv *priv = netdev_priv(x->xso.dev);
+
+ cancel_work_sync(&sa_entry->modify_work.work);
+ mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry);
+ mlx5_ipsec_free_sa_ctx(sa_entry);
+ kfree(sa_entry);
+}
+
+int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec;
+ int ret;
+
+ if (!mlx5_ipsec_device_caps(priv->mdev)) {
+ netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
+ return 0;
+ }
+
+ ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+ if (!ipsec)
+ return -ENOMEM;
+
+ hash_init(ipsec->sadb_rx);
+ spin_lock_init(&ipsec->sadb_rx_lock);
+ ipsec->mdev = priv->mdev;
+ ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
+ priv->netdev->name);
+ if (!ipsec->wq) {
+ ret = -ENOMEM;
+ goto err_wq;
+ }
+
+ ret = mlx5e_accel_ipsec_fs_init(ipsec);
+ if (ret)
+ goto err_fs_init;
+
+ priv->ipsec = ipsec;
+ netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
+ return 0;
+
+err_fs_init:
+ destroy_workqueue(ipsec->wq);
+err_wq:
+ kfree(ipsec);
+ return (ret != -EOPNOTSUPP) ? ret : 0;
+}
+
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+ if (!ipsec)
+ return;
+
+ mlx5e_accel_ipsec_fs_cleanup(ipsec);
+ destroy_workqueue(ipsec->wq);
+ kfree(ipsec);
+ priv->ipsec = NULL;
+}
+
+static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+ if (x->props.family == AF_INET) {
+ /* Offload with IPv4 options is not supported yet */
+ if (ip_hdr(skb)->ihl > 5)
+ return false;
+ } else {
+ /* Offload with IPv6 extension headers is not support yet */
+ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+ return false;
+ }
+
+ return true;
+}
+
+static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+ struct mlx5e_ipsec_modify_state_work *modify_work =
+ &sa_entry->modify_work;
+ bool need_update;
+
+ need_update = mlx5e_ipsec_update_esn_state(sa_entry);
+ if (!need_update)
+ return;
+
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
+ queue_work(sa_entry->ipsec->wq, &modify_work->work);
+}
+
+static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
+ .xdo_dev_state_add = mlx5e_xfrm_add_state,
+ .xdo_dev_state_delete = mlx5e_xfrm_del_state,
+ .xdo_dev_state_free = mlx5e_xfrm_free_state,
+ .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok,
+ .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
+};
+
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *netdev = priv->netdev;
+
+ if (!mlx5_ipsec_device_caps(mdev))
+ return;
+
+ mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
+ netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
+ netdev->features |= NETIF_F_HW_ESP;
+ netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+ if (!MLX5_CAP_ETH(mdev, swp_csum)) {
+ mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
+ return;
+ }
+
+ netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
+ netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+
+ if (!MLX5_CAP_ETH(mdev, swp_lso)) {
+ mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
+ return;
+ }
+
+ netdev->gso_partial_features |= NETIF_F_GSO_ESP;
+ mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
+ netdev->features |= NETIF_F_GSO_ESP;
+ netdev->hw_features |= NETIF_F_GSO_ESP;
+ netdev->hw_enc_features |= NETIF_F_GSO_ESP;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
new file mode 100644
index 000000000..785f18814
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_H__
+#define __MLX5E_IPSEC_H__
+
+
+#include <linux/mlx5/device.h>
+#include <net/xfrm.h>
+#include <linux/idr.h>
+
+#define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
+
+enum mlx5_accel_esp_flags {
+ MLX5_ACCEL_ESP_FLAGS_TUNNEL = 0, /* Default */
+ MLX5_ACCEL_ESP_FLAGS_TRANSPORT = 1UL << 0,
+ MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED = 1UL << 1,
+ MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2,
+};
+
+enum mlx5_accel_esp_action {
+ MLX5_ACCEL_ESP_ACTION_DECRYPT,
+ MLX5_ACCEL_ESP_ACTION_ENCRYPT,
+};
+
+struct aes_gcm_keymat {
+ u64 seq_iv;
+
+ u32 salt;
+ u32 icv_len;
+
+ u32 key_len;
+ u32 aes_key[256 / 32];
+};
+
+struct mlx5_accel_esp_xfrm_attrs {
+ enum mlx5_accel_esp_action action;
+ u32 esn;
+ u32 spi;
+ u32 flags;
+ struct aes_gcm_keymat aes_gcm;
+
+ union {
+ __be32 a4;
+ __be32 a6[4];
+ } saddr;
+
+ union {
+ __be32 a4;
+ __be32 a6[4];
+ } daddr;
+
+ u8 is_ipv6;
+};
+
+enum mlx5_ipsec_cap {
+ MLX5_IPSEC_CAP_CRYPTO = 1 << 0,
+ MLX5_IPSEC_CAP_ESN = 1 << 1,
+};
+
+struct mlx5e_priv;
+
+struct mlx5e_ipsec_sw_stats {
+ atomic64_t ipsec_rx_drop_sp_alloc;
+ atomic64_t ipsec_rx_drop_sadb_miss;
+ atomic64_t ipsec_rx_drop_syndrome;
+ atomic64_t ipsec_tx_drop_bundle;
+ atomic64_t ipsec_tx_drop_no_state;
+ atomic64_t ipsec_tx_drop_not_ip;
+ atomic64_t ipsec_tx_drop_trailer;
+};
+
+struct mlx5e_accel_fs_esp;
+struct mlx5e_ipsec_tx;
+
+struct mlx5e_ipsec {
+ struct mlx5_core_dev *mdev;
+ DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+ spinlock_t sadb_rx_lock; /* Protects sadb_rx */
+ struct mlx5e_ipsec_sw_stats sw_stats;
+ struct workqueue_struct *wq;
+ struct mlx5e_accel_fs_esp *rx_fs;
+ struct mlx5e_ipsec_tx *tx_fs;
+};
+
+struct mlx5e_ipsec_esn_state {
+ u32 esn;
+ u8 trigger: 1;
+ u8 overlap: 1;
+};
+
+struct mlx5e_ipsec_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_modify_hdr *set_modify_hdr;
+};
+
+struct mlx5e_ipsec_modify_state_work {
+ struct work_struct work;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
+};
+
+struct mlx5e_ipsec_sa_entry {
+ struct hlist_node hlist; /* Item in SADB_RX hashtable */
+ struct mlx5e_ipsec_esn_state esn_state;
+ unsigned int handle; /* Handle in SADB_RX */
+ struct xfrm_state *x;
+ struct mlx5e_ipsec *ipsec;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
+ void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo);
+ u32 ipsec_obj_id;
+ u32 enc_key_id;
+ struct mlx5e_ipsec_rule ipsec_rule;
+ struct mlx5e_ipsec_modify_state_work modify_work;
+};
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+int mlx5e_ipsec_init(struct mlx5e_priv *priv);
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
+ unsigned int handle);
+
+void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec);
+int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec);
+int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry);
+void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry);
+
+int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
+void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
+
+u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs);
+
+static inline struct mlx5_core_dev *
+mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ return sa_entry->ipsec->mdev;
+}
+#else
+static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+}
+
+static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+#endif
+
+#endif /* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
new file mode 100644
index 000000000..b859e4a4c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include "en.h"
+#include "en/fs.h"
+#include "ipsec.h"
+#include "fs_core.h"
+
+#define NUM_IPSEC_FTE BIT(15)
+
+enum accel_fs_esp_type {
+ ACCEL_FS_ESP4,
+ ACCEL_FS_ESP6,
+ ACCEL_FS_ESP_NUM_TYPES,
+};
+
+struct mlx5e_ipsec_rx_err {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_modify_hdr *copy_modify_hdr;
+};
+
+struct mlx5e_accel_fs_esp_prot {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_handle *miss_rule;
+ struct mlx5_flow_destination default_dest;
+ struct mlx5e_ipsec_rx_err rx_err;
+ u32 refcnt;
+ struct mutex prot_mutex; /* protect ESP4/ESP6 protocol */
+};
+
+struct mlx5e_accel_fs_esp {
+ struct mlx5e_accel_fs_esp_prot fs_prot[ACCEL_FS_ESP_NUM_TYPES];
+};
+
+struct mlx5e_ipsec_tx {
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ struct mutex mutex; /* Protect IPsec TX steering */
+ u32 refcnt;
+};
+
+/* IPsec RX flow steering */
+static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
+{
+ if (i == ACCEL_FS_ESP4)
+ return MLX5_TT_IPV4_IPSEC_ESP;
+ return MLX5_TT_IPV6_IPSEC_ESP;
+}
+
+static int rx_err_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_accel_fs_esp_prot *fs_prot,
+ struct mlx5e_ipsec_rx_err *rx_err)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_flow_handle *fte;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */
+ MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY);
+ MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME);
+ MLX5_SET(copy_action_in, action, src_offset, 0);
+ MLX5_SET(copy_action_in, action, length, 7);
+ MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+ MLX5_SET(copy_action_in, action, dst_offset, 24);
+
+ modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL,
+ 1, action);
+
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ netdev_err(priv->netdev,
+ "fail to alloc ipsec copy modify_header_id err=%d\n", err);
+ goto out_spec;
+ }
+
+ /* create fte */
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_act.modify_hdr = modify_hdr;
+ fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act,
+ &fs_prot->default_dest, 1);
+ if (IS_ERR(fte)) {
+ err = PTR_ERR(fte);
+ netdev_err(priv->netdev, "fail to add ipsec rx err copy rule err=%d\n", err);
+ goto out;
+ }
+
+ kvfree(spec);
+ rx_err->rule = fte;
+ rx_err->copy_modify_hdr = modify_hdr;
+ return 0;
+
+out:
+ mlx5_modify_header_dealloc(mdev, modify_hdr);
+out_spec:
+ kvfree(spec);
+ return err;
+}
+
+static int rx_fs_create(struct mlx5e_priv *priv,
+ struct mlx5e_accel_fs_esp_prot *fs_prot)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table *ft = fs_prot->ft;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_handle *miss_rule;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_spec *spec;
+ u32 *flow_group_in;
+ int err = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!flow_group_in || !spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Create miss_group */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1);
+ miss_group = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(miss_group)) {
+ err = PTR_ERR(miss_group);
+ netdev_err(priv->netdev, "fail to create ipsec rx miss_group err=%d\n", err);
+ goto out;
+ }
+ fs_prot->miss_group = miss_group;
+
+ /* Create miss rule */
+ miss_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &fs_prot->default_dest, 1);
+ if (IS_ERR(miss_rule)) {
+ mlx5_destroy_flow_group(fs_prot->miss_group);
+ err = PTR_ERR(miss_rule);
+ netdev_err(priv->netdev, "fail to create ipsec rx miss_rule err=%d\n", err);
+ goto out;
+ }
+ fs_prot->miss_rule = miss_rule;
+out:
+ kvfree(flow_group_in);
+ kvfree(spec);
+ return err;
+}
+
+static void rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+{
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+
+ accel_esp = priv->ipsec->rx_fs;
+
+ /* The netdev unreg already happened, so all offloaded rule are already removed */
+ fs_prot = &accel_esp->fs_prot[type];
+
+ mlx5_del_flow_rules(fs_prot->miss_rule);
+ mlx5_destroy_flow_group(fs_prot->miss_group);
+ mlx5_destroy_flow_table(fs_prot->ft);
+
+ mlx5_del_flow_rules(fs_prot->rx_err.rule);
+ mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr);
+ mlx5_destroy_flow_table(fs_prot->rx_err.ft);
+}
+
+static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(priv->fs, false);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ accel_esp = priv->ipsec->rx_fs;
+ fs_prot = &accel_esp->fs_prot[type];
+ fs_prot->default_dest =
+ mlx5_ttc_get_default_dest(ttc, fs_esp2tt(type));
+
+ ft_attr.max_fte = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ fs_prot->rx_err.ft = ft;
+ err = rx_err_add_rule(priv, fs_prot, &fs_prot->rx_err);
+ if (err)
+ goto err_add;
+
+ /* Create FT */
+ ft_attr.max_fte = NUM_IPSEC_FTE;
+ ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+ ft_attr.autogroup.num_reserved_entries = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_fs_ft;
+ }
+ fs_prot->ft = ft;
+
+ err = rx_fs_create(priv, fs_prot);
+ if (err)
+ goto err_fs;
+
+ return 0;
+
+err_fs:
+ mlx5_destroy_flow_table(fs_prot->ft);
+err_fs_ft:
+ mlx5_del_flow_rules(fs_prot->rx_err.rule);
+ mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr);
+err_add:
+ mlx5_destroy_flow_table(fs_prot->rx_err.ft);
+ return err;
+}
+
+static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_accel_fs_esp *accel_esp;
+ int err = 0;
+
+ accel_esp = priv->ipsec->rx_fs;
+ fs_prot = &accel_esp->fs_prot[type];
+ mutex_lock(&fs_prot->prot_mutex);
+ if (fs_prot->refcnt)
+ goto skip;
+
+ /* create FT */
+ err = rx_create(priv, type);
+ if (err)
+ goto out;
+
+ /* connect */
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = fs_prot->ft;
+ mlx5_ttc_fwd_dest(ttc, fs_esp2tt(type), &dest);
+
+skip:
+ fs_prot->refcnt++;
+out:
+ mutex_unlock(&fs_prot->prot_mutex);
+ return err;
+}
+
+static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+
+ accel_esp = priv->ipsec->rx_fs;
+ fs_prot = &accel_esp->fs_prot[type];
+ mutex_lock(&fs_prot->prot_mutex);
+ fs_prot->refcnt--;
+ if (fs_prot->refcnt)
+ goto out;
+
+ /* disconnect */
+ mlx5_ttc_fwd_default_dest(ttc, fs_esp2tt(type));
+
+ /* remove FT */
+ rx_destroy(priv, type);
+
+out:
+ mutex_unlock(&fs_prot->prot_mutex);
+}
+
+/* IPsec TX flow steering */
+static int tx_create(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ ft_attr.max_fte = NUM_IPSEC_FTE;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft = mlx5_create_auto_grouped_flow_table(ipsec->tx_fs->ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err);
+ return err;
+ }
+ ipsec->tx_fs->ft = ft;
+ return 0;
+}
+
+static int tx_ft_get(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+ int err = 0;
+
+ mutex_lock(&tx_fs->mutex);
+ if (tx_fs->refcnt)
+ goto skip;
+
+ err = tx_create(priv);
+ if (err)
+ goto out;
+skip:
+ tx_fs->refcnt++;
+out:
+ mutex_unlock(&tx_fs->mutex);
+ return err;
+}
+
+static void tx_ft_put(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+
+ mutex_lock(&tx_fs->mutex);
+ tx_fs->refcnt--;
+ if (tx_fs->refcnt)
+ goto out;
+
+ mlx5_destroy_flow_table(tx_fs->ft);
+out:
+ mutex_unlock(&tx_fs->mutex);
+}
+
+static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 ipsec_obj_id,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act)
+{
+ u8 ip_version = attrs->is_ipv6 ? 6 : 4;
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS;
+
+ /* ip_version */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ip_version);
+
+ /* Non fragmented */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.frag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.frag, 0);
+
+ /* ESP header */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP);
+
+ /* SPI number */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.outer_esp_spi, attrs->spi);
+
+ if (ip_version == 4) {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &attrs->saddr.a4, 4);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &attrs->daddr.a4, 4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &attrs->saddr.a6, 16);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &attrs->daddr.a6, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ }
+
+ flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC;
+ flow_act->crypto.obj_id = ipsec_obj_id;
+ flow_act->flags |= FLOW_ACT_NO_APPEND;
+}
+
+static int rx_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+ u32 ipsec_obj_id = sa_entry->ipsec_obj_id;
+ struct mlx5_modify_hdr *modify_hdr = NULL;
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_accel_fs_esp *accel_esp;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ enum accel_fs_esp_type type;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ accel_esp = priv->ipsec->rx_fs;
+ type = attrs->is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4;
+ fs_prot = &accel_esp->fs_prot[type];
+
+ err = rx_ft_get(priv, type);
+ if (err)
+ return err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act);
+
+ /* Set bit[31] ipsec marker */
+ /* Set bit[23-0] ipsec_obj_id */
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+ MLX5_SET(set_action_in, action, data, (ipsec_obj_id | BIT(31)));
+ MLX5_SET(set_action_in, action, offset, 0);
+ MLX5_SET(set_action_in, action, length, 32);
+
+ modify_hdr = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL,
+ 1, action);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ netdev_err(priv->netdev,
+ "fail to alloc ipsec set modify_header_id err=%d\n", err);
+ modify_hdr = NULL;
+ goto out_err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ flow_act.modify_hdr = modify_hdr;
+ dest.ft = fs_prot->rx_err.ft;
+ rule = mlx5_add_flow_rules(fs_prot->ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n",
+ attrs->action, err);
+ goto out_err;
+ }
+
+ ipsec_rule->rule = rule;
+ ipsec_rule->set_modify_hdr = modify_hdr;
+ goto out;
+
+out_err:
+ if (modify_hdr)
+ mlx5_modify_header_dealloc(priv->mdev, modify_hdr);
+ rx_ft_put(priv, type);
+
+out:
+ kvfree(spec);
+ return err;
+}
+
+static int tx_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ err = tx_ft_get(priv);
+ if (err)
+ return err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ setup_fte_common(&sa_entry->attrs, sa_entry->ipsec_obj_id, spec,
+ &flow_act);
+
+ /* Add IPsec indicator in metadata_reg_a */
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_IPSEC);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_IPSEC);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT;
+ rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n",
+ sa_entry->attrs.action, err);
+ goto out;
+ }
+
+ sa_entry->ipsec_rule.rule = rule;
+
+out:
+ kvfree(spec);
+ if (err)
+ tx_ft_put(priv);
+ return err;
+}
+
+int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT)
+ return tx_add_rule(priv, sa_entry);
+
+ return rx_add_rule(priv, sa_entry);
+}
+
+void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+
+ mlx5_del_flow_rules(ipsec_rule->rule);
+
+ if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT) {
+ tx_ft_put(priv);
+ return;
+ }
+
+ mlx5_modify_header_dealloc(mdev, ipsec_rule->set_modify_hdr);
+ rx_ft_put(priv,
+ sa_entry->attrs.is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4);
+}
+
+void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
+{
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+ enum accel_fs_esp_type i;
+
+ if (!ipsec->rx_fs)
+ return;
+
+ mutex_destroy(&ipsec->tx_fs->mutex);
+ WARN_ON(ipsec->tx_fs->refcnt);
+ kfree(ipsec->tx_fs);
+
+ accel_esp = ipsec->rx_fs;
+ for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) {
+ fs_prot = &accel_esp->fs_prot[i];
+ mutex_destroy(&fs_prot->prot_mutex);
+ WARN_ON(fs_prot->refcnt);
+ }
+ kfree(ipsec->rx_fs);
+}
+
+int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
+{
+ struct mlx5e_accel_fs_esp_prot *fs_prot;
+ struct mlx5e_accel_fs_esp *accel_esp;
+ struct mlx5_flow_namespace *ns;
+ enum accel_fs_esp_type i;
+ int err = -ENOMEM;
+
+ ns = mlx5_get_flow_namespace(ipsec->mdev,
+ MLX5_FLOW_NAMESPACE_EGRESS_IPSEC);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ ipsec->tx_fs = kzalloc(sizeof(*ipsec->tx_fs), GFP_KERNEL);
+ if (!ipsec->tx_fs)
+ return -ENOMEM;
+
+ ipsec->rx_fs = kzalloc(sizeof(*ipsec->rx_fs), GFP_KERNEL);
+ if (!ipsec->rx_fs)
+ goto err_rx;
+
+ mutex_init(&ipsec->tx_fs->mutex);
+ ipsec->tx_fs->ns = ns;
+
+ accel_esp = ipsec->rx_fs;
+ for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) {
+ fs_prot = &accel_esp->fs_prot[i];
+ mutex_init(&fs_prot->prot_mutex);
+ }
+
+ return 0;
+
+err_rx:
+ kfree(ipsec->tx_fs);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
new file mode 100644
index 000000000..792724ce7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "ipsec.h"
+#include "lib/mlx5.h"
+
+u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ u32 caps = 0;
+
+ if (!MLX5_CAP_GEN(mdev, ipsec_offload))
+ return 0;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return 0;
+
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
+ return 0;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt))
+ return 0;
+
+ if (!MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) ||
+ !MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt))
+ return 0;
+
+ if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) &&
+ MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp))
+ caps |= MLX5_IPSEC_CAP_CRYPTO;
+
+ if (!caps)
+ return 0;
+
+ if (MLX5_CAP_IPSEC(mdev, ipsec_esn))
+ caps |= MLX5_IPSEC_CAP_ESN;
+
+ /* We can accommodate up to 2^24 different IPsec objects
+ * because we use up to 24 bit in flow table metadata
+ * to hold the IPsec Object unique handle.
+ */
+ WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24);
+ return caps;
+}
+EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps);
+
+static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u32 in[MLX5_ST_SZ_DW(create_ipsec_obj_in)] = {};
+ void *obj, *salt_p, *salt_iv_p;
+ int err;
+
+ obj = MLX5_ADDR_OF(create_ipsec_obj_in, in, ipsec_object);
+
+ /* salt and seq_iv */
+ salt_p = MLX5_ADDR_OF(ipsec_obj, obj, salt);
+ memcpy(salt_p, &aes_gcm->salt, sizeof(aes_gcm->salt));
+
+ MLX5_SET(ipsec_obj, obj, icv_length, MLX5_IPSEC_OBJECT_ICV_LEN_16B);
+ salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv);
+ memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv));
+ /* esn */
+ if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
+ MLX5_SET(ipsec_obj, obj, esn_en, 1);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
+ if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP)
+ MLX5_SET(ipsec_obj, obj, esn_overlap, 1);
+ }
+
+ MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id);
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_IPSEC);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ sa_entry->ipsec_obj_id =
+ MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return err;
+}
+
+static void mlx5_destroy_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_IPSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct aes_gcm_keymat *aes_gcm = &sa_entry->attrs.aes_gcm;
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ int err;
+
+ /* key */
+ err = mlx5_create_encryption_key(mdev, aes_gcm->aes_key,
+ aes_gcm->key_len / BITS_PER_BYTE,
+ MLX5_ACCEL_OBJ_IPSEC_KEY,
+ &sa_entry->enc_key_id);
+ if (err) {
+ mlx5_core_dbg(mdev, "Failed to create encryption key (err = %d)\n", err);
+ return err;
+ }
+
+ err = mlx5_create_ipsec_obj(sa_entry);
+ if (err) {
+ mlx5_core_dbg(mdev, "Failed to create IPsec object (err = %d)\n", err);
+ goto err_enc_key;
+ }
+
+ return 0;
+
+err_enc_key:
+ mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id);
+ return err;
+}
+
+void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+
+ mlx5_destroy_ipsec_obj(sa_entry);
+ mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id);
+}
+
+static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ u32 in[MLX5_ST_SZ_DW(modify_ipsec_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(query_ipsec_obj_out)];
+ u64 modify_field_select = 0;
+ u64 general_obj_types;
+ void *obj;
+ int err;
+
+ if (!(attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED))
+ return 0;
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
+ return -EINVAL;
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_IPSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id);
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err) {
+ mlx5_core_err(mdev, "Query IPsec object failed (Object id %d), err = %d\n",
+ sa_entry->ipsec_obj_id, err);
+ return err;
+ }
+
+ obj = MLX5_ADDR_OF(query_ipsec_obj_out, out, ipsec_object);
+ modify_field_select = MLX5_GET64(ipsec_obj, obj, modify_field_select);
+
+ /* esn */
+ if (!(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP) ||
+ !(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB))
+ return -EOPNOTSUPP;
+
+ obj = MLX5_ADDR_OF(modify_ipsec_obj_in, in, ipsec_object);
+ MLX5_SET64(ipsec_obj, obj, modify_field_select,
+ MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP |
+ MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB);
+ MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
+ if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP)
+ MLX5_SET(ipsec_obj, obj, esn_overlap, 1);
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ int err;
+
+ err = mlx5_modify_ipsec_obj(sa_entry, attrs);
+ if (err)
+ return;
+
+ memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
new file mode 100644
index 000000000..c4a84f0a3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/aead.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include "ipsec.h"
+#include "ipsec_rxtx.h"
+#include "en.h"
+
+enum {
+ MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
+ MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
+};
+
+static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+{
+ unsigned int alen = crypto_aead_authsize(x->data);
+ struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
+ struct iphdr *ipv4hdr = ip_hdr(skb);
+ unsigned int trailer_len;
+ u8 plen;
+ int ret;
+
+ ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1);
+ if (unlikely(ret))
+ return ret;
+
+ trailer_len = alen + plen + 2;
+
+ ret = pskb_trim(skb, skb->len - trailer_len);
+ if (unlikely(ret))
+ return ret;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+ ip_send_check(ipv4hdr);
+ } else {
+ ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) -
+ trailer_len);
+ }
+ return 0;
+}
+
+static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, u8 mode,
+ struct xfrm_offload *xo)
+{
+ /* Tunnel Mode:
+ * SWP: OutL3 InL3 InL4
+ * Pkt: MAC IP ESP IP L4
+ *
+ * Transport Mode:
+ * SWP: OutL3 OutL4
+ * Pkt: MAC IP ESP L4
+ *
+ * Tunnel(VXLAN TCP/UDP) over Transport Mode
+ * SWP: OutL3 InL3 InL4
+ * Pkt: MAC IP ESP UDP VXLAN IP L4
+ */
+
+ /* Shared settings */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (skb->protocol == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+ /* Tunnel mode */
+ if (mode == XFRM_MODE_TUNNEL) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (xo->proto == IPPROTO_IPV6)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+
+ switch (xo->inner_ipproto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ /* IP | ESP | IP | [TCP | UDP] */
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ default:
+ break;
+ }
+ return;
+ }
+
+ /* Transport mode */
+ if (mode != XFRM_MODE_TRANSPORT)
+ return;
+
+ if (!xo->inner_ipproto) {
+ switch (xo->proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ /* IP | ESP | TCP */
+ eseg->swp_outer_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ default:
+ break;
+ }
+ } else {
+ /* Tunnel(VXLAN TCP/UDP) over Transport Mode */
+ switch (xo->inner_ipproto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ eseg->swp_inner_l4_offset =
+ (skb->csum_start + skb->head - skb->data) / 2;
+ if (inner_ip_hdr(skb)->version == 6)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ break;
+ default:
+ break;
+ }
+ }
+
+}
+
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo)
+{
+ struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ __u32 oseq = replay_esn->oseq;
+ int iv_offset;
+ __be64 seqno;
+ u32 seq_hi;
+
+ if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID &&
+ MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) {
+ seq_hi = xo->seq.hi - 1;
+ } else {
+ seq_hi = xo->seq.hi;
+ }
+
+ /* Place the SN in the IV field */
+ seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32));
+ iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+ skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo)
+{
+ int iv_offset;
+ __be64 seqno;
+
+ /* Place the SN in the IV field */
+ seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+ iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+ skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+ struct mlx5_wqe_inline_seg *inlseg)
+{
+ inlseg->byte_count = cpu_to_be32(ipsec_st->tailen | MLX5_INLINE_SEG);
+ esp_output_fill_trailer((u8 *)inlseg->data, 0, ipsec_st->plen, ipsec_st->xo->proto);
+}
+
+static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ struct xfrm_state *x,
+ struct xfrm_offload *xo,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ unsigned int blksize, clen, alen, plen;
+ struct crypto_aead *aead;
+ unsigned int tailen;
+
+ ipsec_st->x = x;
+ ipsec_st->xo = xo;
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ clen = ALIGN(skb->len + 2, blksize);
+ plen = max_t(u32, clen - skb->len, 4);
+ tailen = plen + alen;
+ ipsec_st->plen = plen;
+ ipsec_st->tailen = tailen;
+
+ return 0;
+}
+
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct xfrm_encap_tmpl *encap;
+ struct xfrm_state *x;
+ struct sec_path *sp;
+ u8 l3_proto;
+
+ sp = skb_sec_path(skb);
+ if (unlikely(sp->len != 1))
+ return;
+
+ x = xfrm_input_state(skb);
+ if (unlikely(!x))
+ return;
+
+ if (unlikely(!x->xso.offload_handle ||
+ (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))))
+ return;
+
+ mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
+
+ l3_proto = (x->props.family == AF_INET) ?
+ ((struct iphdr *)skb_network_header(skb))->protocol :
+ ((struct ipv6hdr *)skb_network_header(skb))->nexthdr;
+
+ eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
+ eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
+ encap = x->encap;
+ if (!encap) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
+ } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
+ }
+}
+
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct xfrm_state *x;
+ struct sec_path *sp;
+
+ sp = skb_sec_path(skb);
+ if (unlikely(sp->len != 1)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
+ goto drop;
+ }
+
+ x = xfrm_input_state(skb);
+ if (unlikely(!x)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state);
+ goto drop;
+ }
+
+ if (unlikely(!x->xso.offload_handle ||
+ (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6)))) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip);
+ goto drop;
+ }
+
+ if (!skb_is_gso(skb))
+ if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
+ goto drop;
+ }
+
+ sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+ sa_entry->set_iv_op(skb, x, xo);
+ mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st);
+
+ return true;
+
+drop:
+ kfree_skb(skb);
+ return false;
+}
+
+enum {
+ MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED,
+ MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED,
+ MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER,
+};
+
+void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
+{
+ u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata);
+ struct mlx5e_priv *priv;
+ struct xfrm_offload *xo;
+ struct xfrm_state *xs;
+ struct sec_path *sp;
+ u32 sa_handle;
+
+ sa_handle = MLX5_IPSEC_METADATA_HANDLE(ipsec_meta_data);
+ priv = netdev_priv(netdev);
+ sp = secpath_set(skb);
+ if (unlikely(!sp)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
+ return;
+ }
+
+ xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle);
+ if (unlikely(!xs)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
+ return;
+ }
+
+ sp->xvec[sp->len++] = xs;
+ sp->olen++;
+
+ xo = xfrm_offload(skb);
+ xo->flags = CRYPTO_DONE;
+
+ switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) {
+ case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED:
+ xo->status = CRYPTO_SUCCESS;
+ break;
+ case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED:
+ xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
+ break;
+ case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER:
+ xo->status = CRYPTO_INVALID_PACKET_SYNTAX;
+ break;
+ default:
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
new file mode 100644
index 000000000..1878a70b9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_RXTX_H__
+#define __MLX5E_IPSEC_RXTX_H__
+
+#include <linux/skbuff.h>
+#include <net/xfrm.h>
+#include "en.h"
+#include "en/txrx.h"
+
+/* Bit31: IPsec marker, Bit30: reserved, Bit29-24: IPsec syndrome, Bit23-0: IPsec obj id */
+#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1)
+#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0))
+#define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0))
+
+struct mlx5e_accel_tx_ipsec_state {
+ struct xfrm_offload *xo;
+ struct xfrm_state *x;
+ u32 tailen;
+ u32 plen;
+};
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+void mlx5e_ipsec_inverse_table_init(void);
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo);
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo);
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st);
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+ struct mlx5_wqe_inline_seg *inlseg);
+void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe);
+static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ return ipsec_st->tailen;
+}
+
+static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe)
+{
+ return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata));
+}
+
+static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg)
+{
+ return eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
+}
+
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg);
+
+static inline netdev_features_t
+mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct sec_path *sp = skb_sec_path(skb);
+
+ if (sp && sp->len && xo) {
+ struct xfrm_state *x = sp->xvec[0];
+
+ if (!x || !x->xso.offload_handle)
+ goto out_disable;
+
+ if (xo->inner_ipproto) {
+ /* Cannot support tunnel packet over IPsec tunnel mode
+ * because we cannot offload three IP header csum
+ */
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ goto out_disable;
+
+ /* Only support UDP or TCP L4 checksum */
+ if (xo->inner_ipproto != IPPROTO_UDP &&
+ xo->inner_ipproto != IPPROTO_TCP)
+ goto out_disable;
+ }
+
+ return features;
+
+ }
+
+ /* Disable CSUM and GSO for software IPsec */
+out_disable:
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static inline bool
+mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ u8 inner_ipproto;
+
+ if (!mlx5e_ipsec_eseg_meta(eseg))
+ return false;
+
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+ inner_ipproto = xfrm_offload(skb)->inner_ipproto;
+ if (inner_ipproto) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
+ if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP)
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
+ } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial_inner++;
+ }
+
+ return true;
+}
+#else
+static inline
+void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
+{}
+
+static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg)
+{
+ return false;
+}
+
+static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; }
+static inline netdev_features_t
+mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
+{ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); }
+
+static inline bool
+mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ return false;
+}
+#endif /* CONFIG_MLX5_EN_IPSEC */
+
+#endif /* __MLX5E_IPSEC_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
new file mode 100644
index 000000000..9de84821d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "ipsec.h"
+
+static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw)
+{
+ return priv->ipsec ? NUM_IPSEC_SW_COUNTERS : 0;
+}
+
+static inline MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_sw) {}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_sw)
+{
+ unsigned int i;
+
+ if (priv->ipsec)
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_ipsec_sw_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw)
+{
+ int i;
+
+ if (priv->ipsec)
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
+ mlx5e_ipsec_sw_stats_desc, i);
+ return idx;
+}
+
+MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
new file mode 100644
index 000000000..da2184c94
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "en.h"
+#include "lib/mlx5.h"
+#include "en_accel/ktls.h"
+#include "en_accel/ktls_utils.h"
+#include "en_accel/fs_tcp.h"
+
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id)
+{
+ u32 sz_bytes;
+ void *key;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ (struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ return mlx5_create_encryption_key(mdev, key, sz_bytes,
+ MLX5_ACCEL_OBJ_TLS_KEY,
+ p_key_id);
+}
+
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+ mlx5_destroy_encryption_key(mdev, key_id);
+}
+
+static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (!mlx5e_ktls_type_check(mdev, crypto_info))
+ return -EOPNOTSUPP;
+
+ if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+ err = mlx5e_ktls_add_tx(netdev, sk, crypto_info, start_offload_tcp_sn);
+ else
+ err = mlx5e_ktls_add_rx(netdev, sk, crypto_info, start_offload_tcp_sn);
+
+ return err;
+}
+
+static void mlx5e_ktls_del(struct net_device *netdev,
+ struct tls_context *tls_ctx,
+ enum tls_offload_ctx_dir direction)
+{
+ if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+ mlx5e_ktls_del_tx(netdev, tls_ctx);
+ else
+ mlx5e_ktls_del_rx(netdev, tls_ctx);
+}
+
+static int mlx5e_ktls_resync(struct net_device *netdev,
+ struct sock *sk, u32 seq, u8 *rcd_sn,
+ enum tls_offload_ctx_dir direction)
+{
+ if (unlikely(direction != TLS_OFFLOAD_CTX_DIR_RX))
+ return -EOPNOTSUPP;
+
+ mlx5e_ktls_rx_resync(netdev, sk, seq, rcd_sn);
+ return 0;
+}
+
+static const struct tlsdev_ops mlx5e_ktls_ops = {
+ .tls_dev_add = mlx5e_ktls_add,
+ .tls_dev_del = mlx5e_ktls_del,
+ .tls_dev_resync = mlx5e_ktls_resync,
+};
+
+bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev)
+{
+ u8 max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
+
+ if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx))
+ return false;
+
+ /* Check the possibility to post the required ICOSQ WQEs. */
+ if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS))
+ return false;
+ if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS))
+ return false;
+ if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_KTLS_GET_PROGRESS_WQEBBS))
+ return false;
+
+ return true;
+}
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!mlx5e_is_ktls_tx(mdev) && !mlx5e_is_ktls_rx(mdev))
+ return;
+
+ if (mlx5e_is_ktls_tx(mdev)) {
+ netdev->hw_features |= NETIF_F_HW_TLS_TX;
+ netdev->features |= NETIF_F_HW_TLS_TX;
+ }
+
+ if (mlx5e_is_ktls_rx(mdev))
+ netdev->hw_features |= NETIF_F_HW_TLS_RX;
+
+ netdev->tlsdev_ops = &mlx5e_ktls_ops;
+}
+
+int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+ if (enable)
+ err = mlx5e_accel_fs_tcp_create(priv->fs);
+ else
+ mlx5e_accel_fs_tcp_destroy(priv->fs);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ if (!mlx5e_is_ktls_rx(priv->mdev))
+ return 0;
+
+ priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx");
+ if (!priv->tls->rx_wq)
+ return -ENOMEM;
+
+ if (priv->netdev->features & NETIF_F_HW_TLS_RX) {
+ err = mlx5e_accel_fs_tcp_create(priv->fs);
+ if (err) {
+ destroy_workqueue(priv->tls->rx_wq);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv)
+{
+ if (!mlx5e_is_ktls_rx(priv->mdev))
+ return;
+
+ if (priv->netdev->features & NETIF_F_HW_TLS_RX)
+ mlx5e_accel_fs_tcp_destroy(priv->fs);
+
+ destroy_workqueue(priv->tls->rx_wq);
+}
+
+int mlx5e_ktls_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls;
+
+ if (!mlx5e_is_ktls_device(priv->mdev))
+ return 0;
+
+ tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+ if (!tls)
+ return -ENOMEM;
+
+ priv->tls = tls;
+ return 0;
+}
+
+void mlx5e_ktls_cleanup(struct mlx5e_priv *priv)
+{
+ kfree(priv->tls);
+ priv->tls = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
new file mode 100644
index 000000000..1c35045e4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_KTLS_H__
+#define __MLX5E_KTLS_H__
+
+#include <linux/tls.h>
+#include <net/tls.h>
+#include "en.h"
+
+#ifdef CONFIG_MLX5_EN_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id);
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
+
+static inline bool mlx5e_is_ktls_device(struct mlx5_core_dev *mdev)
+{
+ if (is_kdump_kernel())
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, tls_tx) && !MLX5_CAP_GEN(mdev, tls_rx))
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
+
+ return (MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128) ||
+ MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256));
+}
+
+static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info)
+{
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ if (crypto_info->version == TLS_1_2_VERSION)
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ if (crypto_info->version == TLS_1_2_VERSION)
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256);
+ break;
+ }
+
+ return false;
+}
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_ktls_init_tx(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv);
+int mlx5e_ktls_init_rx(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv);
+int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable);
+struct mlx5e_ktls_resync_resp *
+mlx5e_ktls_rx_resync_create_resp_list(void);
+void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list);
+
+static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev)
+{
+ return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx);
+}
+
+bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev);
+
+struct mlx5e_tls_sw_stats {
+ atomic64_t tx_tls_ctx;
+ atomic64_t tx_tls_del;
+ atomic64_t tx_tls_pool_alloc;
+ atomic64_t tx_tls_pool_free;
+ atomic64_t rx_tls_ctx;
+ atomic64_t rx_tls_del;
+};
+
+struct mlx5e_tls {
+ struct mlx5e_tls_sw_stats sw_stats;
+ struct workqueue_struct *rx_wq;
+ struct mlx5e_tls_tx_pool *tx_pool;
+};
+
+int mlx5e_ktls_init(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_ktls_get_count(struct mlx5e_priv *priv);
+int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+#else
+static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable)
+{
+ netdev_warn(netdev, "kTLS is not supported\n");
+ return -EOPNOTSUPP;
+}
+
+static inline struct mlx5e_ktls_resync_resp *
+mlx5e_ktls_rx_resync_create_resp_list(void)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {}
+
+static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev)
+{
+ return false;
+}
+
+static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { }
+static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ return 0;
+}
+
+static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ return 0;
+}
+#endif
+
+#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
new file mode 100644
index 000000000..3e5483474
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <net/inet6_hashtables.h>
+#include "en_accel/en_accel.h"
+#include "en_accel/ktls.h"
+#include "en_accel/ktls_txrx.h"
+#include "en_accel/ktls_utils.h"
+#include "en_accel/fs_tcp.h"
+
+struct accel_rule {
+ struct work_struct work;
+ struct mlx5e_priv *priv;
+ struct mlx5_flow_handle *rule;
+};
+
+#define PROGRESS_PARAMS_WRITE_UNIT 64
+#define PROGRESS_PARAMS_PADDED_SIZE \
+ (ALIGN(sizeof(struct mlx5_wqe_tls_progress_params_seg), \
+ PROGRESS_PARAMS_WRITE_UNIT))
+
+struct mlx5e_ktls_rx_resync_buf {
+ union {
+ struct mlx5_wqe_tls_progress_params_seg progress;
+ u8 pad[PROGRESS_PARAMS_PADDED_SIZE];
+ } ____cacheline_aligned_in_smp;
+ dma_addr_t dma_addr;
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+};
+
+enum {
+ MLX5E_PRIV_RX_FLAG_DELETING,
+ MLX5E_NUM_PRIV_RX_FLAGS,
+};
+
+struct mlx5e_ktls_rx_resync_ctx {
+ struct tls_offload_resync_async core;
+ struct work_struct work;
+ struct mlx5e_priv *priv;
+ refcount_t refcnt;
+ __be64 sw_rcd_sn_be;
+ u32 seq;
+};
+
+struct mlx5e_ktls_offload_context_rx {
+ union mlx5e_crypto_info crypto_info;
+ struct accel_rule rule;
+ struct sock *sk;
+ struct mlx5e_rq_stats *rq_stats;
+ struct mlx5e_tls_sw_stats *sw_stats;
+ struct completion add_ctx;
+ struct mlx5e_tir tir;
+ u32 key_id;
+ u32 rxq;
+ DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS);
+
+ /* resync */
+ spinlock_t lock; /* protects resync fields */
+ struct mlx5e_ktls_rx_resync_ctx resync;
+ struct list_head list;
+};
+
+static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ if (!refcount_dec_and_test(&priv_rx->resync.refcnt))
+ return false;
+
+ kfree(priv_rx);
+ return true;
+}
+
+static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ refcount_inc(&priv_rx->resync.refcnt);
+}
+
+struct mlx5e_ktls_resync_resp {
+ /* protects list changes */
+ spinlock_t lock;
+ struct list_head list;
+};
+
+void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list)
+{
+ kvfree(resp_list);
+}
+
+struct mlx5e_ktls_resync_resp *
+mlx5e_ktls_rx_resync_create_resp_list(void)
+{
+ struct mlx5e_ktls_resync_resp *resp_list;
+
+ resp_list = kvzalloc(sizeof(*resp_list), GFP_KERNEL);
+ if (!resp_list)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&resp_list->list);
+ spin_lock_init(&resp_list->lock);
+
+ return resp_list;
+}
+
+static void accel_rule_handle_work(struct work_struct *work)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct accel_rule *accel_rule;
+ struct mlx5_flow_handle *rule;
+
+ accel_rule = container_of(work, struct accel_rule, work);
+ priv_rx = container_of(accel_rule, struct mlx5e_ktls_offload_context_rx, rule);
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
+ goto out;
+
+ rule = mlx5e_accel_fs_add_sk(accel_rule->priv->fs, priv_rx->sk,
+ mlx5e_tir_get_tirn(&priv_rx->tir),
+ MLX5_FS_DEFAULT_FLOW_TAG);
+ if (!IS_ERR_OR_NULL(rule))
+ accel_rule->rule = rule;
+out:
+ complete(&priv_rx->add_ctx);
+}
+
+static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv)
+{
+ INIT_WORK(&rule->work, accel_rule_handle_work);
+ rule->priv = priv;
+}
+
+static void icosq_fill_wi(struct mlx5e_icosq *sq, u16 pi,
+ struct mlx5e_icosq_wqe_info *wi)
+{
+ sq->db.wqe_info[pi] = *wi;
+}
+
+static struct mlx5_wqe_ctrl_seg *
+post_static_params(struct mlx5e_icosq *sq,
+ struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ struct mlx5e_set_tls_static_params_wqe *wqe;
+ struct mlx5e_icosq_wqe_info wi;
+ u16 pi, num_wqebbs;
+
+ num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS;
+ if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs)))
+ return ERR_PTR(-ENOSPC);
+
+ pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
+ wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi);
+ mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_rx->crypto_info,
+ mlx5e_tir_get_tirn(&priv_rx->tir),
+ priv_rx->key_id, priv_rx->resync.seq, false,
+ TLS_OFFLOAD_CTX_DIR_RX);
+ wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_TLS,
+ .num_wqebbs = num_wqebbs,
+ .tls_set_params.priv_rx = priv_rx,
+ };
+ icosq_fill_wi(sq, pi, &wi);
+ sq->pc += num_wqebbs;
+
+ return &wqe->ctrl;
+}
+
+static struct mlx5_wqe_ctrl_seg *
+post_progress_params(struct mlx5e_icosq *sq,
+ struct mlx5e_ktls_offload_context_rx *priv_rx,
+ u32 next_record_tcp_sn)
+{
+ struct mlx5e_set_tls_progress_params_wqe *wqe;
+ struct mlx5e_icosq_wqe_info wi;
+ u16 pi, num_wqebbs;
+
+ num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS;
+ if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs)))
+ return ERR_PTR(-ENOSPC);
+
+ pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
+ wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi);
+ mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn,
+ mlx5e_tir_get_tirn(&priv_rx->tir),
+ false, next_record_tcp_sn,
+ TLS_OFFLOAD_CTX_DIR_RX);
+ wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_TLS,
+ .num_wqebbs = num_wqebbs,
+ .tls_set_params.priv_rx = priv_rx,
+ };
+
+ icosq_fill_wi(sq, pi, &wi);
+ sq->pc += num_wqebbs;
+
+ return &wqe->ctrl;
+}
+
+static int post_rx_param_wqes(struct mlx5e_channel *c,
+ struct mlx5e_ktls_offload_context_rx *priv_rx,
+ u32 next_record_tcp_sn)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_icosq *sq;
+ int err;
+
+ err = 0;
+ sq = &c->async_icosq;
+ spin_lock_bh(&c->async_icosq_lock);
+
+ cseg = post_static_params(sq, priv_rx);
+ if (IS_ERR(cseg))
+ goto err_out;
+ cseg = post_progress_params(sq, priv_rx, next_record_tcp_sn);
+ if (IS_ERR(cseg))
+ goto err_out;
+
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+unlock:
+ spin_unlock_bh(&c->async_icosq_lock);
+
+ return err;
+
+err_out:
+ priv_rx->rq_stats->tls_resync_req_skip++;
+ err = PTR_ERR(cseg);
+ complete(&priv_rx->add_ctx);
+ goto unlock;
+}
+
+static void
+mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx,
+ struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ struct mlx5e_ktls_offload_context_rx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX);
+
+ BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX);
+
+ *ctx = priv_rx;
+}
+
+static struct mlx5e_ktls_offload_context_rx *
+mlx5e_get_ktls_rx_priv_ctx(struct tls_context *tls_ctx)
+{
+ struct mlx5e_ktls_offload_context_rx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX);
+
+ return *ctx;
+}
+
+/* Re-sync */
+/* Runs in work context */
+static int
+resync_post_get_progress_params(struct mlx5e_icosq *sq,
+ struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ struct mlx5e_get_tls_progress_params_wqe *wqe;
+ struct mlx5e_ktls_rx_resync_buf *buf;
+ struct mlx5e_icosq_wqe_info wi;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_seg_get_psv *psv;
+ struct device *pdev;
+ int err;
+ u16 pi;
+
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ if (unlikely(!buf)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ pdev = mlx5_core_dma_dev(sq->channel->priv->mdev);
+ buf->dma_addr = dma_map_single(pdev, &buf->progress,
+ PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ buf->priv_rx = priv_rx;
+
+ spin_lock_bh(&sq->channel->async_icosq_lock);
+
+ if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) {
+ spin_unlock_bh(&sq->channel->async_icosq_lock);
+ err = -ENOSPC;
+ goto err_dma_unmap;
+ }
+
+ pi = mlx5e_icosq_get_next_pi(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS);
+ wqe = MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi);
+
+#define GET_PSV_DS_CNT (DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS))
+
+ cseg = &wqe->ctrl;
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_GET_PSV |
+ (MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS << 24));
+ cseg->qpn_ds =
+ cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | GET_PSV_DS_CNT);
+
+ psv = &wqe->psv;
+ psv->num_psv = 1 << 4;
+ psv->l_key = sq->channel->mkey_be;
+ psv->psv_index[0] = cpu_to_be32(mlx5e_tir_get_tirn(&priv_rx->tir));
+ psv->va = cpu_to_be64(buf->dma_addr);
+
+ wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_GET_PSV_TLS,
+ .num_wqebbs = MLX5E_KTLS_GET_PROGRESS_WQEBBS,
+ .tls_get_params.buf = buf,
+ };
+ icosq_fill_wi(sq, pi, &wi);
+ sq->pc++;
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ spin_unlock_bh(&sq->channel->async_icosq_lock);
+
+ return 0;
+
+err_dma_unmap:
+ dma_unmap_single(pdev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
+err_free:
+ kfree(buf);
+err_out:
+ priv_rx->rq_stats->tls_resync_req_skip++;
+ return err;
+}
+
+/* Function is called with elevated refcount.
+ * It decreases it only if no WQE is posted.
+ */
+static void resync_handle_work(struct work_struct *work)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ struct mlx5e_channel *c;
+ struct mlx5e_icosq *sq;
+
+ resync = container_of(work, struct mlx5e_ktls_rx_resync_ctx, work);
+ priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync);
+
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
+ mlx5e_ktls_priv_rx_put(priv_rx);
+ return;
+ }
+
+ c = resync->priv->channels.c[priv_rx->rxq];
+ sq = &c->async_icosq;
+
+ if (resync_post_get_progress_params(sq, priv_rx))
+ mlx5e_ktls_priv_rx_put(priv_rx);
+}
+
+static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
+ struct mlx5e_priv *priv)
+{
+ INIT_WORK(&resync->work, resync_handle_work);
+ resync->priv = priv;
+ refcount_set(&resync->refcnt, 1);
+}
+
+/* Function can be called with the refcount being either elevated or not.
+ * It does not affect the refcount.
+ */
+static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx,
+ struct mlx5e_channel *c)
+{
+ struct mlx5e_ktls_resync_resp *ktls_resync;
+ struct mlx5e_icosq *sq;
+ bool trigger_poll;
+
+ sq = &c->async_icosq;
+ ktls_resync = sq->ktls_resync;
+ trigger_poll = false;
+
+ spin_lock_bh(&ktls_resync->lock);
+ spin_lock_bh(&priv_rx->lock);
+ switch (priv_rx->crypto_info.crypto_info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ &priv_rx->crypto_info.crypto_info_128;
+
+ memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be,
+ sizeof(info->rec_seq));
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ &priv_rx->crypto_info.crypto_info_256;
+
+ memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be,
+ sizeof(info->rec_seq));
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ priv_rx->crypto_info.crypto_info.cipher_type);
+ spin_unlock_bh(&priv_rx->lock);
+ spin_unlock_bh(&ktls_resync->lock);
+ return;
+ }
+
+ if (list_empty(&priv_rx->list)) {
+ list_add_tail(&priv_rx->list, &ktls_resync->list);
+ trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+ }
+ spin_unlock_bh(&priv_rx->lock);
+ spin_unlock_bh(&ktls_resync->lock);
+
+ if (!trigger_poll)
+ return;
+
+ if (!napi_if_scheduled_mark_missed(&c->napi)) {
+ spin_lock_bh(&c->async_icosq_lock);
+ mlx5e_trigger_irq(sq);
+ spin_unlock_bh(&c->async_icosq_lock);
+ }
+}
+
+/* Function can be called with the refcount being either elevated or not.
+ * It decreases the refcount and may free the kTLS priv context.
+ * Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was
+ * already in flight.
+ */
+void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
+ struct mlx5e_icosq *sq)
+{
+ struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf;
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ u8 tracker_state, auth_state, *ctx;
+ struct device *dev;
+ u32 hw_seq;
+
+ priv_rx = buf->priv_rx;
+ resync = &priv_rx->resync;
+ dev = mlx5_core_dma_dev(resync->priv->mdev);
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
+ goto out;
+
+ dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE,
+ DMA_FROM_DEVICE);
+
+ ctx = buf->progress.ctx;
+ tracker_state = MLX5_GET(tls_progress_params, ctx, record_tracker_state);
+ auth_state = MLX5_GET(tls_progress_params, ctx, auth_state);
+ if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING ||
+ auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) {
+ priv_rx->rq_stats->tls_resync_req_skip++;
+ goto out;
+ }
+
+ hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn);
+ tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq));
+ priv_rx->rq_stats->tls_resync_req_end++;
+out:
+ mlx5e_ktls_priv_rx_put(priv_rx);
+ dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
+ kfree(buf);
+}
+
+/* Runs in NAPI.
+ * Function elevates the refcount, unless no work is queued.
+ */
+static bool resync_queue_get_psv(struct sock *sk)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+
+ priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_get_ctx(sk));
+ if (unlikely(!priv_rx))
+ return false;
+
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
+ return false;
+
+ resync = &priv_rx->resync;
+ mlx5e_ktls_priv_rx_get(priv_rx);
+ if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work)))
+ mlx5e_ktls_priv_rx_put(priv_rx);
+
+ return true;
+}
+
+/* Runs in NAPI */
+static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)(skb->data);
+ struct net_device *netdev = rq->netdev;
+ struct net *net = dev_net(netdev);
+ struct sock *sk = NULL;
+ unsigned int datalen;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ __be32 seq;
+ int depth = 0;
+
+ __vlan_get_protocol(skb, eth->h_proto, &depth);
+ iph = (struct iphdr *)(skb->data + depth);
+
+ if (iph->version == 4) {
+ depth += sizeof(struct iphdr);
+ th = (void *)iph + sizeof(struct iphdr);
+
+ sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ iph->saddr, th->source, iph->daddr,
+ th->dest, netdev->ifindex);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph;
+
+ depth += sizeof(struct ipv6hdr);
+ th = (void *)ipv6h + sizeof(struct ipv6hdr);
+
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ &ipv6h->saddr, th->source,
+ &ipv6h->daddr, ntohs(th->dest),
+ netdev->ifindex, 0);
+#endif
+ }
+
+ depth += sizeof(struct tcphdr);
+
+ if (unlikely(!sk))
+ return;
+
+ if (unlikely(sk->sk_state == TCP_TIME_WAIT))
+ goto unref;
+
+ if (unlikely(!resync_queue_get_psv(sk)))
+ goto unref;
+
+ seq = th->seq;
+ datalen = skb->len - depth;
+ tls_offload_rx_resync_async_request_start(sk, seq, datalen);
+ rq->stats->tls_resync_req_start++;
+
+unref:
+ sock_gen_put(sk);
+}
+
+void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk,
+ u32 seq, u8 *rcd_sn)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ struct mlx5e_priv *priv;
+ struct mlx5e_channel *c;
+
+ priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_get_ctx(sk));
+ if (unlikely(!priv_rx))
+ return;
+
+ resync = &priv_rx->resync;
+ resync->sw_rcd_sn_be = *(__be64 *)rcd_sn;
+ resync->seq = seq;
+
+ priv = netdev_priv(netdev);
+ c = priv->channels.c[priv_rx->rxq];
+
+ resync_handle_seq_match(priv_rx, c);
+}
+
+/* End of resync section */
+
+void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe, u32 *cqe_bcnt)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ switch (get_cqe_tls_offload(cqe)) {
+ case CQE_TLS_OFFLOAD_DECRYPTED:
+ skb->decrypted = 1;
+ stats->tls_decrypted_packets++;
+ stats->tls_decrypted_bytes += *cqe_bcnt;
+ break;
+ case CQE_TLS_OFFLOAD_RESYNC:
+ stats->tls_resync_req_pkt++;
+ resync_update_sn(rq, skb);
+ break;
+ default: /* CQE_TLS_OFFLOAD_ERROR: */
+ stats->tls_err++;
+ break;
+ }
+}
+
+void mlx5e_ktls_handle_ctx_completion(struct mlx5e_icosq_wqe_info *wi)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx = wi->tls_set_params.priv_rx;
+ struct accel_rule *rule = &priv_rx->rule;
+
+ if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
+ complete(&priv_rx->add_ctx);
+ return;
+ }
+ queue_work(rule->priv->tls->rx_wq, &rule->work);
+}
+
+static int mlx5e_ktls_sk_get_rxq(struct sock *sk)
+{
+ int rxq = sk_rx_queue_get(sk);
+
+ if (unlikely(rxq == -1))
+ rxq = 0;
+
+ return rxq;
+}
+
+int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ struct tls_context *tls_ctx;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+ int rxq, err;
+
+ tls_ctx = tls_get_ctx(sk);
+ priv = netdev_priv(netdev);
+ mdev = priv->mdev;
+ priv_rx = kzalloc(sizeof(*priv_rx), GFP_KERNEL);
+ if (unlikely(!priv_rx))
+ return -ENOMEM;
+
+ err = mlx5_ktls_create_key(mdev, crypto_info, &priv_rx->key_id);
+ if (err)
+ goto err_create_key;
+
+ INIT_LIST_HEAD(&priv_rx->list);
+ spin_lock_init(&priv_rx->lock);
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ priv_rx->crypto_info.crypto_info_128 =
+ *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ priv_rx->crypto_info.crypto_info_256 =
+ *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+ break;
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ crypto_info->cipher_type);
+ return -EOPNOTSUPP;
+ }
+
+ rxq = mlx5e_ktls_sk_get_rxq(sk);
+ priv_rx->rxq = rxq;
+ priv_rx->sk = sk;
+
+ priv_rx->rq_stats = &priv->channel_stats[rxq]->rq;
+ priv_rx->sw_stats = &priv->tls->sw_stats;
+ mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
+
+ err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir);
+ if (err)
+ goto err_create_tir;
+
+ init_completion(&priv_rx->add_ctx);
+
+ accel_rule_init(&priv_rx->rule, priv);
+ resync = &priv_rx->resync;
+ resync_init(resync, priv);
+ tls_offload_ctx_rx(tls_ctx)->resync_async = &resync->core;
+ tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC);
+
+ err = post_rx_param_wqes(priv->channels.c[rxq], priv_rx, start_offload_tcp_sn);
+ if (err)
+ goto err_post_wqes;
+
+ atomic64_inc(&priv_rx->sw_stats->rx_tls_ctx);
+
+ return 0;
+
+err_post_wqes:
+ mlx5e_tir_destroy(&priv_rx->tir);
+err_create_tir:
+ mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
+err_create_key:
+ kfree(priv_rx);
+ return err;
+}
+
+void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ struct mlx5e_ktls_rx_resync_ctx *resync;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+ mdev = priv->mdev;
+
+ priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx);
+ set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags);
+ mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL);
+ synchronize_net(); /* Sync with NAPI */
+ if (!cancel_work_sync(&priv_rx->rule.work))
+ /* completion is needed, as the priv_rx in the add flow
+ * is maintained on the wqe info (wi), not on the socket.
+ */
+ wait_for_completion(&priv_rx->add_ctx);
+ resync = &priv_rx->resync;
+ if (cancel_work_sync(&resync->work))
+ mlx5e_ktls_priv_rx_put(priv_rx);
+
+ atomic64_inc(&priv_rx->sw_stats->rx_tls_del);
+ if (priv_rx->rule.rule)
+ mlx5e_accel_fs_del_sk(priv_rx->rule.rule);
+
+ mlx5e_tir_destroy(&priv_rx->tir);
+ mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
+ /* priv_rx should normally be freed here, but if there is an outstanding
+ * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is
+ * processed.
+ */
+ mlx5e_ktls_priv_rx_put(priv_rx);
+}
+
+bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget)
+{
+ struct mlx5e_ktls_offload_context_rx *priv_rx, *tmp;
+ struct mlx5e_ktls_resync_resp *ktls_resync;
+ struct mlx5_wqe_ctrl_seg *db_cseg;
+ struct mlx5e_icosq *sq;
+ LIST_HEAD(local_list);
+ int i, j;
+
+ sq = &c->async_icosq;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return false;
+
+ ktls_resync = sq->ktls_resync;
+ db_cseg = NULL;
+ i = 0;
+
+ spin_lock(&ktls_resync->lock);
+ list_for_each_entry_safe(priv_rx, tmp, &ktls_resync->list, list) {
+ list_move(&priv_rx->list, &local_list);
+ if (++i == budget)
+ break;
+ }
+ if (list_empty(&ktls_resync->list))
+ clear_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+ spin_unlock(&ktls_resync->lock);
+
+ spin_lock(&c->async_icosq_lock);
+ for (j = 0; j < i; j++) {
+ struct mlx5_wqe_ctrl_seg *cseg;
+
+ priv_rx = list_first_entry(&local_list,
+ struct mlx5e_ktls_offload_context_rx,
+ list);
+ spin_lock(&priv_rx->lock);
+ cseg = post_static_params(sq, priv_rx);
+ if (IS_ERR(cseg)) {
+ spin_unlock(&priv_rx->lock);
+ break;
+ }
+ list_del_init(&priv_rx->list);
+ spin_unlock(&priv_rx->lock);
+ db_cseg = cseg;
+ }
+ if (db_cseg)
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, db_cseg);
+ spin_unlock(&c->async_icosq_lock);
+
+ priv_rx->rq_stats->tls_resync_res_ok += j;
+
+ if (!list_empty(&local_list)) {
+ /* This happens only if ICOSQ is full.
+ * There is no need to mark busy or explicitly ask for a NAPI cycle,
+ * it will be triggered by the outstanding ICOSQ completions.
+ */
+ spin_lock(&ktls_resync->lock);
+ list_splice(&local_list, &ktls_resync->list);
+ set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+ spin_unlock(&ktls_resync->lock);
+ priv_rx->rq_stats->tls_resync_res_retry++;
+ }
+
+ return i == budget;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
new file mode 100644
index 000000000..7c1c0eb16
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "fpga/sdk.h"
+#include "en_accel/ktls.h"
+
+static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_free) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+int mlx5e_ktls_get_count(struct mlx5e_priv *priv)
+{
+ if (!priv->tls)
+ return 0;
+
+ return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc);
+}
+
+int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ unsigned int i, n, idx = 0;
+
+ if (!priv->tls)
+ return 0;
+
+ n = mlx5e_ktls_get_count(priv);
+
+ for (i = 0; i < n; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_ktls_sw_stats_desc[i].format);
+
+ return n;
+}
+
+int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ unsigned int i, n, idx = 0;
+
+ if (!priv->tls)
+ return 0;
+
+ n = mlx5e_ktls_get_count(priv);
+
+ for (i = 0; i < n; i++)
+ data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
+ mlx5e_ktls_sw_stats_desc,
+ i);
+
+ return n;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
new file mode 100644
index 000000000..2e0335246
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -0,0 +1,921 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "en_accel/ktls.h"
+#include "en_accel/ktls_txrx.h"
+#include "en_accel/ktls_utils.h"
+
+struct mlx5e_dump_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_data_seg data;
+};
+
+#define MLX5E_KTLS_DUMP_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB))
+
+static u8
+mlx5e_ktls_dumps_num_wqes(struct mlx5e_params *params, unsigned int nfrags,
+ unsigned int sync_len)
+{
+ /* Given the MTU and sync_len, calculates an upper bound for the
+ * number of DUMP WQEs needed for the TX resync of a record.
+ */
+ return nfrags + DIV_ROUND_UP(sync_len, MLX5E_SW2HW_MTU(params, params->sw_mtu));
+}
+
+u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ u16 num_dumps, stop_room = 0;
+
+ if (!mlx5e_is_ktls_tx(mdev))
+ return 0;
+
+ num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
+
+ stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
+ stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS);
+ stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS);
+ stop_room += 1; /* fence nop */
+
+ return stop_room;
+}
+
+static void mlx5e_ktls_set_tisc(struct mlx5_core_dev *mdev, void *tisc)
+{
+ MLX5_SET(tisc, tisc, tls_en, 1);
+ MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
+}
+
+static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+
+ mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx));
+
+ return mlx5_core_create_tis(mdev, in, tisn);
+}
+
+static int mlx5e_ktls_create_tis_cb(struct mlx5_core_dev *mdev,
+ struct mlx5_async_ctx *async_ctx,
+ u32 *out, int outlen,
+ mlx5_async_cbk_t callback,
+ struct mlx5_async_work *context)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+
+ mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx));
+ MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
+
+ return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in),
+ out, outlen, callback, context);
+}
+
+static int mlx5e_ktls_destroy_tis_cb(struct mlx5_core_dev *mdev, u32 tisn,
+ struct mlx5_async_ctx *async_ctx,
+ u32 *out, int outlen,
+ mlx5_async_cbk_t callback,
+ struct mlx5_async_work *context)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
+
+ MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, tisn, tisn);
+
+ return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in),
+ out, outlen, callback, context);
+}
+
+struct mlx5e_ktls_offload_context_tx {
+ /* fast path */
+ u32 expected_seq;
+ u32 tisn;
+ bool ctx_post_pending;
+ /* control / resync */
+ struct list_head list_node; /* member of the pool */
+ union mlx5e_crypto_info crypto_info;
+ struct tls_offload_context_tx *tx_ctx;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_tls_sw_stats *sw_stats;
+ u32 key_id;
+ u8 create_err : 1;
+};
+
+static void
+mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
+ struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ struct mlx5e_ktls_offload_context_tx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX);
+
+ BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX);
+
+ *ctx = priv_tx;
+}
+
+static struct mlx5e_ktls_offload_context_tx *
+mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
+{
+ struct mlx5e_ktls_offload_context_tx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX);
+
+ return *ctx;
+}
+
+/* struct for callback API management */
+struct mlx5e_async_ctx {
+ struct mlx5_async_work context;
+ struct mlx5_async_ctx async_ctx;
+ struct work_struct work;
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct completion complete;
+ int err;
+ union {
+ u32 out_create[MLX5_ST_SZ_DW(create_tis_out)];
+ u32 out_destroy[MLX5_ST_SZ_DW(destroy_tis_out)];
+ };
+};
+
+static struct mlx5e_async_ctx *mlx5e_bulk_async_init(struct mlx5_core_dev *mdev, int n)
+{
+ struct mlx5e_async_ctx *bulk_async;
+ int i;
+
+ bulk_async = kvcalloc(n, sizeof(struct mlx5e_async_ctx), GFP_KERNEL);
+ if (!bulk_async)
+ return NULL;
+
+ for (i = 0; i < n; i++) {
+ struct mlx5e_async_ctx *async = &bulk_async[i];
+
+ mlx5_cmd_init_async_ctx(mdev, &async->async_ctx);
+ init_completion(&async->complete);
+ }
+
+ return bulk_async;
+}
+
+static void mlx5e_bulk_async_cleanup(struct mlx5e_async_ctx *bulk_async, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ struct mlx5e_async_ctx *async = &bulk_async[i];
+
+ mlx5_cmd_cleanup_async_ctx(&async->async_ctx);
+ }
+ kvfree(bulk_async);
+}
+
+static void create_tis_callback(int status, struct mlx5_async_work *context)
+{
+ struct mlx5e_async_ctx *async =
+ container_of(context, struct mlx5e_async_ctx, context);
+ struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx;
+
+ if (status) {
+ async->err = status;
+ priv_tx->create_err = 1;
+ goto out;
+ }
+
+ priv_tx->tisn = MLX5_GET(create_tis_out, async->out_create, tisn);
+out:
+ complete(&async->complete);
+}
+
+static void destroy_tis_callback(int status, struct mlx5_async_work *context)
+{
+ struct mlx5e_async_ctx *async =
+ container_of(context, struct mlx5e_async_ctx, context);
+ struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx;
+
+ complete(&async->complete);
+ kfree(priv_tx);
+}
+
+static struct mlx5e_ktls_offload_context_tx *
+mlx5e_tls_priv_tx_init(struct mlx5_core_dev *mdev, struct mlx5e_tls_sw_stats *sw_stats,
+ struct mlx5e_async_ctx *async)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ int err;
+
+ priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL);
+ if (!priv_tx)
+ return ERR_PTR(-ENOMEM);
+
+ priv_tx->mdev = mdev;
+ priv_tx->sw_stats = sw_stats;
+
+ if (!async) {
+ err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn);
+ if (err)
+ goto err_out;
+ } else {
+ async->priv_tx = priv_tx;
+ err = mlx5e_ktls_create_tis_cb(mdev, &async->async_ctx,
+ async->out_create, sizeof(async->out_create),
+ create_tis_callback, &async->context);
+ if (err)
+ goto err_out;
+ }
+
+ return priv_tx;
+
+err_out:
+ kfree(priv_tx);
+ return ERR_PTR(err);
+}
+
+static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ struct mlx5e_async_ctx *async)
+{
+ if (priv_tx->create_err) {
+ complete(&async->complete);
+ kfree(priv_tx);
+ return;
+ }
+ async->priv_tx = priv_tx;
+ mlx5e_ktls_destroy_tis_cb(priv_tx->mdev, priv_tx->tisn,
+ &async->async_ctx,
+ async->out_destroy, sizeof(async->out_destroy),
+ destroy_tis_callback, &async->context);
+}
+
+static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev,
+ struct list_head *list, int size)
+{
+ struct mlx5e_ktls_offload_context_tx *obj, *n;
+ struct mlx5e_async_ctx *bulk_async;
+ int i;
+
+ bulk_async = mlx5e_bulk_async_init(mdev, size);
+ if (!bulk_async)
+ return;
+
+ i = 0;
+ list_for_each_entry_safe(obj, n, list, list_node) {
+ mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]);
+ i++;
+ }
+
+ for (i = 0; i < size; i++) {
+ struct mlx5e_async_ctx *async = &bulk_async[i];
+
+ wait_for_completion(&async->complete);
+ }
+ mlx5e_bulk_async_cleanup(bulk_async, size);
+}
+
+/* Recycling pool API */
+
+#define MLX5E_TLS_TX_POOL_BULK (16)
+#define MLX5E_TLS_TX_POOL_HIGH (4 * 1024)
+#define MLX5E_TLS_TX_POOL_LOW (MLX5E_TLS_TX_POOL_HIGH / 4)
+
+struct mlx5e_tls_tx_pool {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_tls_sw_stats *sw_stats;
+ struct mutex lock; /* Protects access to the pool */
+ struct list_head list;
+ size_t size;
+
+ struct workqueue_struct *wq;
+ struct work_struct create_work;
+ struct work_struct destroy_work;
+};
+
+static void create_work(struct work_struct *work)
+{
+ struct mlx5e_tls_tx_pool *pool =
+ container_of(work, struct mlx5e_tls_tx_pool, create_work);
+ struct mlx5e_ktls_offload_context_tx *obj;
+ struct mlx5e_async_ctx *bulk_async;
+ LIST_HEAD(local_list);
+ int i, j, err = 0;
+
+ bulk_async = mlx5e_bulk_async_init(pool->mdev, MLX5E_TLS_TX_POOL_BULK);
+ if (!bulk_async)
+ return;
+
+ for (i = 0; i < MLX5E_TLS_TX_POOL_BULK; i++) {
+ obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, &bulk_async[i]);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ break;
+ }
+ list_add(&obj->list_node, &local_list);
+ }
+
+ for (j = 0; j < i; j++) {
+ struct mlx5e_async_ctx *async = &bulk_async[j];
+
+ wait_for_completion(&async->complete);
+ if (!err && async->err)
+ err = async->err;
+ }
+ atomic64_add(i, &pool->sw_stats->tx_tls_pool_alloc);
+ mlx5e_bulk_async_cleanup(bulk_async, MLX5E_TLS_TX_POOL_BULK);
+ if (err)
+ goto err_out;
+
+ mutex_lock(&pool->lock);
+ if (pool->size + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH) {
+ mutex_unlock(&pool->lock);
+ goto err_out;
+ }
+ list_splice(&local_list, &pool->list);
+ pool->size += MLX5E_TLS_TX_POOL_BULK;
+ if (pool->size <= MLX5E_TLS_TX_POOL_LOW)
+ queue_work(pool->wq, work);
+ mutex_unlock(&pool->lock);
+ return;
+
+err_out:
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, i);
+ atomic64_add(i, &pool->sw_stats->tx_tls_pool_free);
+}
+
+static void destroy_work(struct work_struct *work)
+{
+ struct mlx5e_tls_tx_pool *pool =
+ container_of(work, struct mlx5e_tls_tx_pool, destroy_work);
+ struct mlx5e_ktls_offload_context_tx *obj;
+ LIST_HEAD(local_list);
+ int i = 0;
+
+ mutex_lock(&pool->lock);
+ if (pool->size < MLX5E_TLS_TX_POOL_HIGH) {
+ mutex_unlock(&pool->lock);
+ return;
+ }
+
+ list_for_each_entry(obj, &pool->list, list_node)
+ if (++i == MLX5E_TLS_TX_POOL_BULK)
+ break;
+
+ list_cut_position(&local_list, &pool->list, &obj->list_node);
+ pool->size -= MLX5E_TLS_TX_POOL_BULK;
+ if (pool->size >= MLX5E_TLS_TX_POOL_HIGH)
+ queue_work(pool->wq, work);
+ mutex_unlock(&pool->lock);
+
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK);
+ atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free);
+}
+
+static struct mlx5e_tls_tx_pool *mlx5e_tls_tx_pool_init(struct mlx5_core_dev *mdev,
+ struct mlx5e_tls_sw_stats *sw_stats)
+{
+ struct mlx5e_tls_tx_pool *pool;
+
+ BUILD_BUG_ON(MLX5E_TLS_TX_POOL_LOW + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH);
+
+ pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return NULL;
+
+ pool->wq = create_singlethread_workqueue("mlx5e_tls_tx_pool");
+ if (!pool->wq)
+ goto err_free;
+
+ INIT_LIST_HEAD(&pool->list);
+ mutex_init(&pool->lock);
+
+ INIT_WORK(&pool->create_work, create_work);
+ INIT_WORK(&pool->destroy_work, destroy_work);
+
+ pool->mdev = mdev;
+ pool->sw_stats = sw_stats;
+
+ return pool;
+
+err_free:
+ kvfree(pool);
+ return NULL;
+}
+
+static void mlx5e_tls_tx_pool_list_cleanup(struct mlx5e_tls_tx_pool *pool)
+{
+ while (pool->size > MLX5E_TLS_TX_POOL_BULK) {
+ struct mlx5e_ktls_offload_context_tx *obj;
+ LIST_HEAD(local_list);
+ int i = 0;
+
+ list_for_each_entry(obj, &pool->list, list_node)
+ if (++i == MLX5E_TLS_TX_POOL_BULK)
+ break;
+
+ list_cut_position(&local_list, &pool->list, &obj->list_node);
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK);
+ atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free);
+ pool->size -= MLX5E_TLS_TX_POOL_BULK;
+ }
+ if (pool->size) {
+ mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &pool->list, pool->size);
+ atomic64_add(pool->size, &pool->sw_stats->tx_tls_pool_free);
+ }
+}
+
+static void mlx5e_tls_tx_pool_cleanup(struct mlx5e_tls_tx_pool *pool)
+{
+ mlx5e_tls_tx_pool_list_cleanup(pool);
+ destroy_workqueue(pool->wq);
+ kvfree(pool);
+}
+
+static void pool_push(struct mlx5e_tls_tx_pool *pool, struct mlx5e_ktls_offload_context_tx *obj)
+{
+ mutex_lock(&pool->lock);
+ list_add(&obj->list_node, &pool->list);
+ if (++pool->size == MLX5E_TLS_TX_POOL_HIGH)
+ queue_work(pool->wq, &pool->destroy_work);
+ mutex_unlock(&pool->lock);
+}
+
+static struct mlx5e_ktls_offload_context_tx *pool_pop(struct mlx5e_tls_tx_pool *pool)
+{
+ struct mlx5e_ktls_offload_context_tx *obj;
+
+ mutex_lock(&pool->lock);
+ if (unlikely(pool->size == 0)) {
+ /* pool is empty:
+ * - trigger the populating work, and
+ * - serve the current context via the regular blocking api.
+ */
+ queue_work(pool->wq, &pool->create_work);
+ mutex_unlock(&pool->lock);
+ obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, NULL);
+ if (!IS_ERR(obj))
+ atomic64_inc(&pool->sw_stats->tx_tls_pool_alloc);
+ return obj;
+ }
+
+ obj = list_first_entry(&pool->list, struct mlx5e_ktls_offload_context_tx,
+ list_node);
+ list_del(&obj->list_node);
+ if (--pool->size == MLX5E_TLS_TX_POOL_LOW)
+ queue_work(pool->wq, &pool->create_work);
+ mutex_unlock(&pool->lock);
+ return obj;
+}
+
+/* End of pool API */
+
+int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk,
+ struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct mlx5e_tls_tx_pool *pool;
+ struct tls_context *tls_ctx;
+ struct mlx5e_priv *priv;
+ int err;
+
+ tls_ctx = tls_get_ctx(sk);
+ priv = netdev_priv(netdev);
+ pool = priv->tls->tx_pool;
+
+ priv_tx = pool_pop(pool);
+ if (IS_ERR(priv_tx))
+ return PTR_ERR(priv_tx);
+
+ err = mlx5_ktls_create_key(pool->mdev, crypto_info, &priv_tx->key_id);
+ if (err)
+ goto err_create_key;
+
+ priv_tx->expected_seq = start_offload_tcp_sn;
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ priv_tx->crypto_info.crypto_info_128 =
+ *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ priv_tx->crypto_info.crypto_info_256 =
+ *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+ break;
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ crypto_info->cipher_type);
+ return -EOPNOTSUPP;
+ }
+ priv_tx->tx_ctx = tls_offload_ctx_tx(tls_ctx);
+
+ mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx);
+
+ priv_tx->ctx_post_pending = true;
+ atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx);
+
+ return 0;
+
+err_create_key:
+ pool_push(pool, priv_tx);
+ return err;
+}
+
+void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct mlx5e_tls_tx_pool *pool;
+ struct mlx5e_priv *priv;
+
+ priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+ priv = netdev_priv(netdev);
+ pool = priv->tls->tx_pool;
+
+ atomic64_inc(&priv_tx->sw_stats->tx_tls_del);
+ mlx5_ktls_destroy_key(priv_tx->mdev, priv_tx->key_id);
+ pool_push(pool, priv_tx);
+}
+
+static void tx_fill_wi(struct mlx5e_txqsq *sq,
+ u16 pi, u8 num_wqebbs, u32 num_bytes,
+ struct page *page)
+{
+ struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = num_wqebbs,
+ .num_bytes = num_bytes,
+ .resync_dump_frag_page = page,
+ };
+}
+
+static bool
+mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ bool ret = priv_tx->ctx_post_pending;
+
+ priv_tx->ctx_post_pending = false;
+
+ return ret;
+}
+
+static void
+post_static_params(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5e_set_tls_static_params_wqe *wqe;
+ u16 pi, num_wqebbs;
+
+ num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS;
+ pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
+ wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi);
+ mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_tx->crypto_info,
+ priv_tx->tisn, priv_tx->key_id, 0, fence,
+ TLS_OFFLOAD_CTX_DIR_TX);
+ tx_fill_wi(sq, pi, num_wqebbs, 0, NULL);
+ sq->pc += num_wqebbs;
+}
+
+static void
+post_progress_params(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5e_set_tls_progress_params_wqe *wqe;
+ u16 pi, num_wqebbs;
+
+ num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS;
+ pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
+ wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi);
+ mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, priv_tx->tisn, fence, 0,
+ TLS_OFFLOAD_CTX_DIR_TX);
+ tx_fill_wi(sq, pi, num_wqebbs, 0, NULL);
+ sq->pc += num_wqebbs;
+}
+
+static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ tx_fill_wi(sq, pi, 1, 0, NULL);
+
+ mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
+}
+
+static void
+mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool skip_static_post, bool fence_first_post)
+{
+ bool progress_fence = skip_static_post || !fence_first_post;
+
+ if (!skip_static_post)
+ post_static_params(sq, priv_tx, fence_first_post);
+
+ post_progress_params(sq, priv_tx, progress_fence);
+ tx_post_fence_nop(sq);
+}
+
+struct tx_sync_info {
+ u64 rcd_sn;
+ u32 sync_len;
+ int nr_frags;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+enum mlx5e_ktls_sync_retval {
+ MLX5E_KTLS_SYNC_DONE,
+ MLX5E_KTLS_SYNC_FAIL,
+ MLX5E_KTLS_SYNC_SKIP_NO_DATA,
+};
+
+static enum mlx5e_ktls_sync_retval
+tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ u32 tcp_seq, int datalen, struct tx_sync_info *info)
+{
+ struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx;
+ enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE;
+ struct tls_record_info *record;
+ int remaining, i = 0;
+ unsigned long flags;
+ bool ends_before;
+
+ spin_lock_irqsave(&tx_ctx->lock, flags);
+ record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn);
+
+ if (unlikely(!record)) {
+ ret = MLX5E_KTLS_SYNC_FAIL;
+ goto out;
+ }
+
+ /* There are the following cases:
+ * 1. packet ends before start marker: bypass offload.
+ * 2. packet starts before start marker and ends after it: drop,
+ * not supported, breaks contract with kernel.
+ * 3. packet ends before tls record info starts: drop,
+ * this packet was already acknowledged and its record info
+ * was released.
+ */
+ ends_before = before(tcp_seq + datalen - 1, tls_record_start_seq(record));
+
+ if (unlikely(tls_record_is_start_marker(record))) {
+ ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL;
+ goto out;
+ } else if (ends_before) {
+ ret = MLX5E_KTLS_SYNC_FAIL;
+ goto out;
+ }
+
+ info->sync_len = tcp_seq - tls_record_start_seq(record);
+ remaining = info->sync_len;
+ while (remaining > 0) {
+ skb_frag_t *frag = &record->frags[i];
+
+ get_page(skb_frag_page(frag));
+ remaining -= skb_frag_size(frag);
+ info->frags[i++] = *frag;
+ }
+ /* reduce the part which will be sent with the original SKB */
+ if (remaining < 0)
+ skb_frag_size_add(&info->frags[i - 1], remaining);
+ info->nr_frags = i;
+out:
+ spin_unlock_irqrestore(&tx_ctx->lock, flags);
+ return ret;
+}
+
+static void
+tx_post_resync_params(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ u64 rcd_sn)
+{
+ __be64 rn_be = cpu_to_be64(rcd_sn);
+ bool skip_static_post;
+ u16 rec_seq_sz;
+ char *rec_seq;
+
+ switch (priv_tx->crypto_info.crypto_info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info.crypto_info_128;
+
+ rec_seq = info->rec_seq;
+ rec_seq_sz = sizeof(info->rec_seq);
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info = &priv_tx->crypto_info.crypto_info_256;
+
+ rec_seq = info->rec_seq;
+ rec_seq_sz = sizeof(info->rec_seq);
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ priv_tx->crypto_info.crypto_info.cipher_type);
+ return;
+ }
+
+ skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz);
+ if (!skip_static_post)
+ memcpy(rec_seq, &rn_be, rec_seq_sz);
+
+ mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true);
+}
+
+static int
+tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_dump_wqe *wqe;
+ dma_addr_t dma_addr = 0;
+ u16 ds_cnt;
+ int fsz;
+ u16 pi;
+
+ BUILD_BUG_ON(MLX5E_KTLS_DUMP_WQEBBS != 1);
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wqe = MLX5E_TLS_FETCH_DUMP_WQE(sq, pi);
+
+ ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+
+ cseg = &wqe->ctrl;
+ dseg = &wqe->data;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ cseg->tis_tir_num = cpu_to_be32(tisn << 8);
+
+ fsz = skb_frag_size(frag);
+ dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+ return -ENOMEM;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(fsz);
+ mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
+
+ tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag));
+ sq->pc += MLX5E_KTLS_DUMP_WQEBBS;
+
+ return 0;
+}
+
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ struct mlx5e_sq_stats *stats;
+ struct mlx5e_sq_dma *dma;
+
+ dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
+ stats = sq->stats;
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ put_page(wi->resync_dump_frag_page);
+ stats->tls_dump_packets++;
+ stats->tls_dump_bytes += wi->num_bytes;
+}
+
+static enum mlx5e_ktls_sync_retval
+mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ struct mlx5e_txqsq *sq,
+ int datalen,
+ u32 seq)
+{
+ enum mlx5e_ktls_sync_retval ret;
+ struct tx_sync_info info = {};
+ int i;
+
+ ret = tx_sync_info_get(priv_tx, seq, datalen, &info);
+ if (unlikely(ret != MLX5E_KTLS_SYNC_DONE))
+ /* We might get here with ret == FAIL if a retransmission
+ * reaches the driver after the relevant record is acked.
+ * It should be safe to drop the packet in this case
+ */
+ return ret;
+
+ tx_post_resync_params(sq, priv_tx, info.rcd_sn);
+
+ for (i = 0; i < info.nr_frags; i++) {
+ unsigned int orig_fsz, frag_offset = 0, n = 0;
+ skb_frag_t *f = &info.frags[i];
+
+ orig_fsz = skb_frag_size(f);
+
+ do {
+ unsigned int fsz;
+
+ n++;
+ fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset);
+ skb_frag_size_set(f, fsz);
+ if (tx_post_resync_dump(sq, f, priv_tx->tisn)) {
+ page_ref_add(skb_frag_page(f), n - 1);
+ goto err_out;
+ }
+
+ skb_frag_off_add(f, fsz);
+ frag_offset += fsz;
+ } while (frag_offset < orig_fsz);
+
+ page_ref_add(skb_frag_page(f), n - 1);
+ }
+
+ return MLX5E_KTLS_SYNC_DONE;
+
+err_out:
+ for (; i < info.nr_frags; i++)
+ /* The put_page() here undoes the page ref obtained in tx_sync_info_get().
+ * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be
+ * released only upon their completions (or in mlx5e_free_txqsq_descs,
+ * if channel closes).
+ */
+ put_page(skb_frag_page(&info.frags[i]));
+
+ return MLX5E_KTLS_SYNC_FAIL;
+}
+
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_tls_state *state)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct mlx5e_sq_stats *stats = sq->stats;
+ struct net_device *tls_netdev;
+ struct tls_context *tls_ctx;
+ int datalen;
+ u32 seq;
+
+ datalen = skb->len - skb_tcp_all_headers(skb);
+ if (!datalen)
+ return true;
+
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
+ tls_ctx = tls_get_ctx(skb->sk);
+ tls_netdev = rcu_dereference_bh(tls_ctx->netdev);
+ /* Don't WARN on NULL: if tls_device_down is running in parallel,
+ * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was
+ * true. Rather continue processing this packet.
+ */
+ if (WARN_ON_ONCE(tls_netdev && tls_netdev != netdev))
+ goto err_out;
+
+ priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+
+ if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx)))
+ mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
+
+ seq = ntohl(tcp_hdr(skb)->seq);
+ if (unlikely(priv_tx->expected_seq != seq)) {
+ enum mlx5e_ktls_sync_retval ret =
+ mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq);
+
+ stats->tls_ooo++;
+
+ switch (ret) {
+ case MLX5E_KTLS_SYNC_DONE:
+ break;
+ case MLX5E_KTLS_SYNC_SKIP_NO_DATA:
+ stats->tls_skip_no_sync_data++;
+ if (likely(!skb->decrypted))
+ goto out;
+ WARN_ON_ONCE(1);
+ goto err_out;
+ case MLX5E_KTLS_SYNC_FAIL:
+ stats->tls_drop_no_sync_data++;
+ goto err_out;
+ }
+ }
+
+ priv_tx->expected_seq = seq + datalen;
+
+ state->tls_tisn = priv_tx->tisn;
+
+ stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+ stats->tls_encrypted_bytes += datalen;
+
+out:
+ return true;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return false;
+}
+
+int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
+{
+ if (!mlx5e_is_ktls_tx(priv->mdev))
+ return 0;
+
+ priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats);
+ if (!priv->tls->tx_pool)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
+{
+ if (!mlx5e_is_ktls_tx(priv->mdev))
+ return;
+
+ mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool);
+ priv->tls->tx_pool = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c
new file mode 100644
index 000000000..570a912dd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "en_accel/ktls_txrx.h"
+#include "en_accel/ktls_utils.h"
+
+enum {
+ MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2,
+};
+
+enum {
+ MLX5E_ENCRYPTION_STANDARD_TLS = 0x1,
+};
+
+#define EXTRACT_INFO_FIELDS do { \
+ salt = info->salt; \
+ rec_seq = info->rec_seq; \
+ salt_sz = sizeof(info->salt); \
+ rec_seq_sz = sizeof(info->rec_seq); \
+} while (0)
+
+static void
+fill_static_params(struct mlx5_wqe_tls_static_params_seg *params,
+ union mlx5e_crypto_info *crypto_info,
+ u32 key_id, u32 resync_tcp_sn)
+{
+ char *initial_rn, *gcm_iv;
+ u16 salt_sz, rec_seq_sz;
+ char *salt, *rec_seq;
+ u8 tls_version;
+ u8 *ctx;
+
+ ctx = params->ctx;
+
+ switch (crypto_info->crypto_info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ &crypto_info->crypto_info_128;
+
+ EXTRACT_INFO_FIELDS;
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ &crypto_info->crypto_info_256;
+
+ EXTRACT_INFO_FIELDS;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Unsupported cipher type %u\n",
+ crypto_info->crypto_info.cipher_type);
+ return;
+ }
+
+ gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
+ initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number);
+
+ memcpy(gcm_iv, salt, salt_sz);
+ memcpy(initial_rn, rec_seq, rec_seq_sz);
+
+ tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2;
+
+ MLX5_SET(tls_static_params, ctx, tls_version, tls_version);
+ MLX5_SET(tls_static_params, ctx, const_1, 1);
+ MLX5_SET(tls_static_params, ctx, const_2, 2);
+ MLX5_SET(tls_static_params, ctx, encryption_standard,
+ MLX5E_ENCRYPTION_STANDARD_TLS);
+ MLX5_SET(tls_static_params, ctx, resync_tcp_sn, resync_tcp_sn);
+ MLX5_SET(tls_static_params, ctx, dek_index, key_id);
+}
+
+void
+mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe,
+ u16 pc, u32 sqn,
+ union mlx5e_crypto_info *crypto_info,
+ u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn,
+ bool fence, enum tls_offload_ctx_dir direction)
+{
+ struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ u8 opmod = direction == TLS_OFFLOAD_CTX_DIR_TX ?
+ MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS :
+ MLX5_OPC_MOD_TLS_TIR_STATIC_PARAMS;
+
+#define STATIC_PARAMS_DS_CNT DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)
+
+ cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR | (opmod << 24));
+ cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ STATIC_PARAMS_DS_CNT);
+ cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+ cseg->tis_tir_num = cpu_to_be32(tis_tir_num << 8);
+
+ ucseg->flags = MLX5_UMR_INLINE;
+ ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16);
+
+ fill_static_params(&wqe->params, crypto_info, key_id, resync_tcp_sn);
+}
+
+static void
+fill_progress_params(struct mlx5_wqe_tls_progress_params_seg *params, u32 tis_tir_num,
+ u32 next_record_tcp_sn)
+{
+ u8 *ctx = params->ctx;
+
+ params->tis_tir_num = cpu_to_be32(tis_tir_num);
+
+ MLX5_SET(tls_progress_params, ctx, next_record_tcp_sn,
+ next_record_tcp_sn);
+ MLX5_SET(tls_progress_params, ctx, record_tracker_state,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START);
+ MLX5_SET(tls_progress_params, ctx, auth_state,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD);
+}
+
+void
+mlx5e_ktls_build_progress_params(struct mlx5e_set_tls_progress_params_wqe *wqe,
+ u16 pc, u32 sqn,
+ u32 tis_tir_num, bool fence,
+ u32 next_record_tcp_sn,
+ enum tls_offload_ctx_dir direction)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ u8 opmod = direction == TLS_OFFLOAD_CTX_DIR_TX ?
+ MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS :
+ MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS;
+
+#define PROGRESS_PARAMS_DS_CNT DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)
+
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV | (opmod << 24));
+ cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ PROGRESS_PARAMS_DS_CNT);
+ cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+ fill_progress_params(&wqe->params, tis_tir_num, next_record_tcp_sn);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
new file mode 100644
index 000000000..2dd78dd4a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_KTLS_TXRX_H__
+#define __MLX5E_KTLS_TXRX_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <net/tls.h>
+#include "en.h"
+#include "en/txrx.h"
+
+struct mlx5e_accel_tx_tls_state {
+ u32 tls_tisn;
+};
+
+u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_tls_state *state);
+void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe, u32 *cqe_bcnt);
+
+void mlx5e_ktls_handle_ctx_completion(struct mlx5e_icosq_wqe_info *wi);
+void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
+ struct mlx5e_icosq *sq);
+
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc);
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ if (unlikely(wi->resync_dump_frag_page)) {
+ mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
+ return true;
+ }
+ return false;
+}
+
+bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget);
+
+static inline bool
+mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
+{
+ return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state);
+}
+
+static inline bool mlx5e_ktls_skb_offloaded(struct sk_buff *skb)
+{
+ return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
+}
+
+static inline void
+mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg,
+ struct mlx5e_accel_tx_tls_state *state)
+{
+ cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8);
+}
+#else
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ return false;
+}
+
+static inline bool
+mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget)
+{
+ return false;
+}
+
+static inline bool
+mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
+{
+ return false;
+}
+
+static inline u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return 0;
+}
+
+static inline void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe,
+ u32 *cqe_bcnt)
+{
+}
+#endif /* CONFIG_MLX5_EN_TLS */
+
+#endif /* __MLX5E_TLS_TXRX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
new file mode 100644
index 000000000..3d79cd379
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_KTLS_UTILS_H__
+#define __MLX5E_KTLS_UTILS_H__
+
+#include <net/tls.h>
+#include "en.h"
+
+enum {
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2,
+};
+
+enum {
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 2,
+};
+
+int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk,
+ struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn);
+void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx);
+int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
+ struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn);
+void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx);
+void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, u32 seq, u8 *rcd_sn);
+
+union mlx5e_crypto_info {
+ struct tls_crypto_info crypto_info;
+ struct tls12_crypto_info_aes_gcm_128 crypto_info_128;
+ struct tls12_crypto_info_aes_gcm_256 crypto_info_256;
+};
+
+struct mlx5e_set_tls_static_params_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_umr_ctrl_seg uctrl;
+ struct mlx5_mkey_seg mkc;
+ struct mlx5_wqe_tls_static_params_seg params;
+};
+
+struct mlx5e_set_tls_progress_params_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_tls_progress_params_seg params;
+};
+
+struct mlx5e_get_tls_progress_params_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_seg_get_psv psv;
+};
+
+#define MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_set_tls_static_params_wqe), MLX5_SEND_WQE_BB))
+
+#define MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_set_tls_progress_params_wqe), MLX5_SEND_WQE_BB))
+
+#define MLX5E_KTLS_GET_PROGRESS_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_get_tls_progress_params_wqe), MLX5_SEND_WQE_BB))
+
+#define MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi) \
+ ((struct mlx5e_set_tls_static_params_wqe *)\
+ mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_set_tls_static_params_wqe)))
+
+#define MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi) \
+ ((struct mlx5e_set_tls_progress_params_wqe *)\
+ mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_set_tls_progress_params_wqe)))
+
+#define MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi) \
+ ((struct mlx5e_get_tls_progress_params_wqe *)\
+ mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_get_tls_progress_params_wqe)))
+
+#define MLX5E_TLS_FETCH_DUMP_WQE(sq, pi) \
+ ((struct mlx5e_dump_wqe *)\
+ mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_dump_wqe)))
+
+void
+mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe,
+ u16 pc, u32 sqn,
+ union mlx5e_crypto_info *crypto_info,
+ u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn,
+ bool fence, enum tls_offload_ctx_dir direction);
+void
+mlx5e_ktls_build_progress_params(struct mlx5e_set_tls_progress_params_wqe *wqe,
+ u16 pc, u32 sqn,
+ u32 tis_tir_num, bool fence,
+ u32 next_record_tcp_sn,
+ enum tls_offload_ctx_dir direction);
+
+#endif /* __MLX5E_TLS_UTILS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
new file mode 100644
index 000000000..a7832a018
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -0,0 +1,1866 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/xarray.h>
+
+#include "en.h"
+#include "lib/aso.h"
+#include "lib/mlx5.h"
+#include "en_accel/macsec.h"
+#include "en_accel/macsec_fs.h"
+
+#define MLX5_MACSEC_EPN_SCOPE_MID 0x80000000L
+#define MLX5E_MACSEC_ASO_CTX_SZ MLX5_ST_SZ_BYTES(macsec_aso)
+
+enum mlx5_macsec_aso_event_arm {
+ MLX5E_ASO_EPN_ARM = BIT(0),
+};
+
+enum {
+ MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET,
+};
+
+struct mlx5e_macsec_handle {
+ struct mlx5e_macsec *macsec;
+ u32 obj_id;
+ u8 idx;
+};
+
+enum {
+ MLX5_MACSEC_EPN,
+};
+
+struct mlx5e_macsec_aso_out {
+ u8 event_arm;
+ u32 mode_param;
+};
+
+struct mlx5e_macsec_aso_in {
+ u8 mode;
+ u32 obj_id;
+};
+
+struct mlx5e_macsec_epn_state {
+ u32 epn_msb;
+ u8 epn_enabled;
+ u8 overlap;
+};
+
+struct mlx5e_macsec_async_work {
+ struct mlx5e_macsec *macsec;
+ struct mlx5_core_dev *mdev;
+ struct work_struct work;
+ u32 obj_id;
+};
+
+struct mlx5e_macsec_sa {
+ bool active;
+ u8 assoc_num;
+ u32 macsec_obj_id;
+ u32 enc_key_id;
+ u32 next_pn;
+ sci_t sci;
+ ssci_t ssci;
+ salt_t salt;
+
+ struct rhash_head hash;
+ u32 fs_id;
+ union mlx5e_macsec_rule *macsec_rule;
+ struct rcu_head rcu_head;
+ struct mlx5e_macsec_epn_state epn_state;
+};
+
+struct mlx5e_macsec_rx_sc;
+struct mlx5e_macsec_rx_sc_xarray_element {
+ u32 fs_id;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+};
+
+struct mlx5e_macsec_rx_sc {
+ bool active;
+ sci_t sci;
+ struct mlx5e_macsec_sa *rx_sa[MACSEC_NUM_AN];
+ struct list_head rx_sc_list_element;
+ struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
+ struct metadata_dst *md_dst;
+ struct rcu_head rcu_head;
+};
+
+struct mlx5e_macsec_umr {
+ u8 __aligned(64) ctx[MLX5_ST_SZ_BYTES(macsec_aso)];
+ dma_addr_t dma_addr;
+ u32 mkey;
+};
+
+struct mlx5e_macsec_aso {
+ /* ASO */
+ struct mlx5_aso *maso;
+ /* Protects macsec ASO */
+ struct mutex aso_lock;
+ /* UMR */
+ struct mlx5e_macsec_umr *umr;
+
+ u32 pdn;
+};
+
+static const struct rhashtable_params rhash_sci = {
+ .key_len = sizeof_field(struct mlx5e_macsec_sa, sci),
+ .key_offset = offsetof(struct mlx5e_macsec_sa, sci),
+ .head_offset = offsetof(struct mlx5e_macsec_sa, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+};
+
+struct mlx5e_macsec_device {
+ const struct net_device *netdev;
+ struct mlx5e_macsec_sa *tx_sa[MACSEC_NUM_AN];
+ struct list_head macsec_rx_sc_list_head;
+ unsigned char *dev_addr;
+ struct list_head macsec_device_list_element;
+};
+
+struct mlx5e_macsec {
+ struct list_head macsec_device_list_head;
+ int num_of_devices;
+ struct mlx5e_macsec_fs *macsec_fs;
+ struct mutex lock; /* Protects mlx5e_macsec internal contexts */
+
+ /* Tx sci -> fs id mapping handling */
+ struct rhashtable sci_hash; /* sci -> mlx5e_macsec_sa */
+
+ /* Rx fs_id -> rx_sc mapping */
+ struct xarray sc_xarray;
+
+ struct mlx5_core_dev *mdev;
+
+ /* Stats manage */
+ struct mlx5e_macsec_stats stats;
+
+ /* ASO */
+ struct mlx5e_macsec_aso aso;
+
+ struct notifier_block nb;
+ struct workqueue_struct *wq;
+};
+
+struct mlx5_macsec_obj_attrs {
+ u32 aso_pdn;
+ u32 next_pn;
+ __be64 sci;
+ u32 enc_key_id;
+ bool encrypt;
+ struct mlx5e_macsec_epn_state epn_state;
+ salt_t salt;
+ __be32 ssci;
+ bool replay_protect;
+ u32 replay_window;
+};
+
+struct mlx5_aso_ctrl_param {
+ u8 data_mask_mode;
+ u8 condition_0_operand;
+ u8 condition_1_operand;
+ u8 condition_0_offset;
+ u8 condition_1_offset;
+ u8 data_offset;
+ u8 condition_operand;
+ u32 condition_0_data;
+ u32 condition_0_mask;
+ u32 condition_1_data;
+ u32 condition_1_mask;
+ u64 bitwise_data;
+ u64 data_mask;
+};
+
+static int mlx5e_macsec_aso_reg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso)
+{
+ struct mlx5e_macsec_umr *umr;
+ struct device *dma_device;
+ dma_addr_t dma_addr;
+ int err;
+
+ umr = kzalloc(sizeof(*umr), GFP_KERNEL);
+ if (!umr) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ dma_device = &mdev->pdev->dev;
+ dma_addr = dma_map_single(dma_device, umr->ctx, sizeof(umr->ctx), DMA_BIDIRECTIONAL);
+ err = dma_mapping_error(dma_device, dma_addr);
+ if (err) {
+ mlx5_core_err(mdev, "Can't map dma device, err=%d\n", err);
+ goto out_dma;
+ }
+
+ err = mlx5e_create_mkey(mdev, aso->pdn, &umr->mkey);
+ if (err) {
+ mlx5_core_err(mdev, "Can't create mkey, err=%d\n", err);
+ goto out_mkey;
+ }
+
+ umr->dma_addr = dma_addr;
+
+ aso->umr = umr;
+
+ return 0;
+
+out_mkey:
+ dma_unmap_single(dma_device, dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL);
+out_dma:
+ kfree(umr);
+ return err;
+}
+
+static void mlx5e_macsec_aso_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso)
+{
+ struct mlx5e_macsec_umr *umr = aso->umr;
+
+ mlx5_core_destroy_mkey(mdev, umr->mkey);
+ dma_unmap_single(&mdev->pdev->dev, umr->dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL);
+ kfree(umr);
+}
+
+static int macsec_set_replay_protection(struct mlx5_macsec_obj_attrs *attrs, void *aso_ctx)
+{
+ u8 window_sz;
+
+ if (!attrs->replay_protect)
+ return 0;
+
+ switch (attrs->replay_window) {
+ case 256:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_256BIT;
+ break;
+ case 128:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_128BIT;
+ break;
+ case 64:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_64BIT;
+ break;
+ case 32:
+ window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_32BIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ MLX5_SET(macsec_aso, aso_ctx, window_size, window_sz);
+ MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_REPLAY_PROTECTION);
+
+ return 0;
+}
+
+static int mlx5e_macsec_create_object(struct mlx5_core_dev *mdev,
+ struct mlx5_macsec_obj_attrs *attrs,
+ bool is_tx,
+ u32 *macsec_obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_macsec_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ void *aso_ctx;
+ void *obj;
+ int err;
+
+ obj = MLX5_ADDR_OF(create_macsec_obj_in, in, macsec_object);
+ aso_ctx = MLX5_ADDR_OF(macsec_offload_obj, obj, macsec_aso);
+
+ MLX5_SET(macsec_offload_obj, obj, confidentiality_en, attrs->encrypt);
+ MLX5_SET(macsec_offload_obj, obj, dekn, attrs->enc_key_id);
+ MLX5_SET(macsec_offload_obj, obj, aso_return_reg, MLX5_MACSEC_ASO_REG_C_4_5);
+ MLX5_SET(macsec_offload_obj, obj, macsec_aso_access_pd, attrs->aso_pdn);
+ MLX5_SET(macsec_aso, aso_ctx, mode_parameter, attrs->next_pn);
+
+ /* Epn */
+ if (attrs->epn_state.epn_enabled) {
+ void *salt_p;
+ int i;
+
+ MLX5_SET(macsec_aso, aso_ctx, epn_event_arm, 1);
+ MLX5_SET(macsec_offload_obj, obj, epn_en, 1);
+ MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb);
+ MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap);
+ MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)attrs->ssci);
+ salt_p = MLX5_ADDR_OF(macsec_offload_obj, obj, salt);
+ for (i = 0; i < 3 ; i++)
+ memcpy((u32 *)salt_p + i, &attrs->salt.bytes[4 * (2 - i)], 4);
+ } else {
+ MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)(attrs->sci));
+ }
+
+ MLX5_SET(macsec_aso, aso_ctx, valid, 0x1);
+ if (is_tx) {
+ MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_INC_SN);
+ } else {
+ err = macsec_set_replay_protection(attrs, aso_ctx);
+ if (err)
+ return err;
+ }
+
+ /* general object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err) {
+ mlx5_core_err(mdev,
+ "MACsec offload: Failed to create MACsec object (err = %d)\n",
+ err);
+ return err;
+ }
+
+ *macsec_obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return err;
+}
+
+static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_sa *sa,
+ bool is_tx)
+{
+ int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT :
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT;
+
+ if ((is_tx) && sa->fs_id) {
+ /* Make sure ongoing datapath readers sees a valid SA */
+ rhashtable_remove_fast(&macsec->sci_hash, &sa->hash, rhash_sci);
+ sa->fs_id = 0;
+ }
+
+ if (!sa->macsec_rule)
+ return;
+
+ mlx5e_macsec_fs_del_rule(macsec->macsec_fs, sa->macsec_rule, action);
+ mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id);
+ sa->macsec_rule = NULL;
+}
+
+static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
+ struct mlx5e_macsec_sa *sa,
+ bool encrypt,
+ bool is_tx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec *macsec = priv->macsec;
+ struct mlx5_macsec_rule_attrs rule_attrs;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_macsec_obj_attrs obj_attrs;
+ union mlx5e_macsec_rule *macsec_rule;
+ int err;
+
+ obj_attrs.next_pn = sa->next_pn;
+ obj_attrs.sci = cpu_to_be64((__force u64)sa->sci);
+ obj_attrs.enc_key_id = sa->enc_key_id;
+ obj_attrs.encrypt = encrypt;
+ obj_attrs.aso_pdn = macsec->aso.pdn;
+ obj_attrs.epn_state = sa->epn_state;
+
+ if (sa->epn_state.epn_enabled) {
+ obj_attrs.ssci = cpu_to_be32((__force u32)sa->ssci);
+ memcpy(&obj_attrs.salt, &sa->salt, sizeof(sa->salt));
+ }
+
+ obj_attrs.replay_window = ctx->secy->replay_window;
+ obj_attrs.replay_protect = ctx->secy->replay_protect;
+
+ err = mlx5e_macsec_create_object(mdev, &obj_attrs, is_tx, &sa->macsec_obj_id);
+ if (err)
+ return err;
+
+ rule_attrs.macsec_obj_id = sa->macsec_obj_id;
+ rule_attrs.sci = sa->sci;
+ rule_attrs.assoc_num = sa->assoc_num;
+ rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT :
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT;
+
+ macsec_rule = mlx5e_macsec_fs_add_rule(macsec->macsec_fs, ctx, &rule_attrs, &sa->fs_id);
+ if (!macsec_rule) {
+ err = -ENOMEM;
+ goto destroy_macsec_object;
+ }
+
+ sa->macsec_rule = macsec_rule;
+
+ if (is_tx) {
+ err = rhashtable_insert_fast(&macsec->sci_hash, &sa->hash, rhash_sci);
+ if (err)
+ goto destroy_macsec_object_and_rule;
+ }
+
+ return 0;
+
+destroy_macsec_object_and_rule:
+ mlx5e_macsec_cleanup_sa(macsec, sa, is_tx);
+destroy_macsec_object:
+ mlx5e_macsec_destroy_object(mdev, sa->macsec_obj_id);
+
+ return err;
+}
+
+static struct mlx5e_macsec_rx_sc *
+mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci)
+{
+ struct mlx5e_macsec_rx_sc *iter;
+
+ list_for_each_entry_rcu(iter, list, rx_sc_list_element) {
+ if (iter->sci == sci)
+ return iter;
+ }
+
+ return NULL;
+}
+
+static int macsec_rx_sa_active_update(struct macsec_context *ctx,
+ struct mlx5e_macsec_sa *rx_sa,
+ bool active)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec *macsec = priv->macsec;
+ int err = 0;
+
+ if (rx_sa->active == active)
+ return 0;
+
+ rx_sa->active = active;
+ if (!active) {
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ return 0;
+ }
+
+ err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false);
+ if (err)
+ rx_sa->active = false;
+
+ return err;
+}
+
+static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx)
+{
+ const struct net_device *netdev = ctx->netdev;
+ const struct macsec_secy *secy = ctx->secy;
+
+ if (secy->validate_frames != MACSEC_VALIDATE_STRICT) {
+ netdev_err(netdev,
+ "MACsec offload is supported only when validate_frame is in strict mode\n");
+ return false;
+ }
+
+ if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) {
+ netdev_err(netdev, "MACsec offload is supported only when icv_len is %d\n",
+ MACSEC_DEFAULT_ICV_LEN);
+ return false;
+ }
+
+ if (!secy->protect_frames) {
+ netdev_err(netdev,
+ "MACsec offload is supported only when protect_frames is set\n");
+ return false;
+ }
+
+ if (!ctx->secy->tx_sc.encrypt) {
+ netdev_err(netdev, "MACsec offload: encrypt off isn't supported\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5e_macsec_device *
+mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec,
+ const struct macsec_context *ctx)
+{
+ struct mlx5e_macsec_device *iter;
+ const struct list_head *list;
+
+ list = &macsec->macsec_device_list_head;
+ list_for_each_entry_rcu(iter, list, macsec_device_list_element) {
+ if (iter->netdev == ctx->secy->netdev)
+ return iter;
+ }
+
+ return NULL;
+}
+
+static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key,
+ const pn_t *next_pn_halves, ssci_t ssci)
+{
+ struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state;
+
+ sa->ssci = ssci;
+ sa->salt = key->salt;
+ epn_state->epn_enabled = 1;
+ epn_state->epn_msb = next_pn_halves->upper;
+ epn_state->overlap = next_pn_halves->lower < MLX5_MACSEC_EPN_SCOPE_MID ? 0 : 1;
+}
+
+static int mlx5e_macsec_add_txsa(struct macsec_context *ctx)
+{
+ const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
+ const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct macsec_secy *secy = ctx->secy;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EEXIST;
+ goto out;
+ }
+
+ if (macsec_device->tx_sa[assoc_num]) {
+ netdev_err(ctx->netdev, "MACsec offload tx_sa: %d already exist\n", assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ tx_sa = kzalloc(sizeof(*tx_sa), GFP_KERNEL);
+ if (!tx_sa) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ tx_sa->active = ctx_tx_sa->active;
+ tx_sa->next_pn = ctx_tx_sa->next_pn_halves.lower;
+ tx_sa->sci = secy->sci;
+ tx_sa->assoc_num = assoc_num;
+
+ if (secy->xpn)
+ update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves,
+ ctx_tx_sa->ssci);
+
+ err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len,
+ MLX5_ACCEL_OBJ_MACSEC_KEY,
+ &tx_sa->enc_key_id);
+ if (err)
+ goto destroy_sa;
+
+ macsec_device->tx_sa[assoc_num] = tx_sa;
+ if (!secy->operational ||
+ assoc_num != tx_sc->encoding_sa ||
+ !tx_sa->active)
+ goto out;
+
+ err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true);
+ if (err)
+ goto destroy_encryption_key;
+
+ mutex_unlock(&macsec->lock);
+
+ return 0;
+
+destroy_encryption_key:
+ macsec_device->tx_sa[assoc_num] = NULL;
+ mlx5_destroy_encryption_key(mdev, tx_sa->enc_key_id);
+destroy_sa:
+ kfree(tx_sa);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx)
+{
+ const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
+ const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ struct net_device *netdev;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ netdev = ctx->netdev;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ tx_sa = macsec_device->tx_sa[assoc_num];
+ if (!tx_sa) {
+ netdev_err(netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ if (ctx->sa.update_pn) {
+ netdev_err(netdev, "MACsec offload: update TX sa %d PN isn't supported\n",
+ assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (tx_sa->active == ctx_tx_sa->active)
+ goto out;
+
+ tx_sa->active = ctx_tx_sa->active;
+ if (tx_sa->assoc_num != tx_sc->encoding_sa)
+ goto out;
+
+ if (ctx_tx_sa->active) {
+ err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true);
+ if (err)
+ goto out;
+ } else {
+ if (!tx_sa->macsec_rule) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ }
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_txsa(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ tx_sa = macsec_device->tx_sa[assoc_num];
+ if (!tx_sa) {
+ netdev_err(ctx->netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id);
+ kfree_rcu(tx_sa);
+ macsec_device->tx_sa[assoc_num] = NULL;
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static u32 mlx5e_macsec_get_sa_from_hashtable(struct rhashtable *sci_hash, sci_t *sci)
+{
+ struct mlx5e_macsec_sa *macsec_sa;
+ u32 fs_id = 0;
+
+ rcu_read_lock();
+ macsec_sa = rhashtable_lookup(sci_hash, sci, rhash_sci);
+ if (macsec_sa)
+ fs_id = macsec_sa->fs_id;
+ rcu_read_unlock();
+
+ return fs_id;
+}
+
+static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx)
+{
+ struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct list_head *rx_sc_list;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sc_list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(rx_sc_list, ctx_rx_sc->sci);
+ if (rx_sc) {
+ netdev_err(ctx->netdev, "MACsec offload: rx_sc (sci %lld) already exists\n",
+ ctx_rx_sc->sci);
+ err = -EEXIST;
+ goto out;
+ }
+
+ rx_sc = kzalloc(sizeof(*rx_sc), GFP_KERNEL);
+ if (!rx_sc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sc_xarray_element = kzalloc(sizeof(*sc_xarray_element), GFP_KERNEL);
+ if (!sc_xarray_element) {
+ err = -ENOMEM;
+ goto destroy_rx_sc;
+ }
+
+ sc_xarray_element->rx_sc = rx_sc;
+ err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element,
+ XA_LIMIT(1, MLX5_MACEC_RX_FS_ID_MAX), GFP_KERNEL);
+ if (err) {
+ if (err == -EBUSY)
+ netdev_err(ctx->netdev,
+ "MACsec offload: unable to create entry for RX SC (%d Rx SCs already allocated)\n",
+ MLX5_MACEC_RX_FS_ID_MAX);
+ goto destroy_sc_xarray_elemenet;
+ }
+
+ rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL);
+ if (!rx_sc->md_dst) {
+ err = -ENOMEM;
+ goto erase_xa_alloc;
+ }
+
+ rx_sc->sci = ctx_rx_sc->sci;
+ rx_sc->active = ctx_rx_sc->active;
+ list_add_rcu(&rx_sc->rx_sc_list_element, rx_sc_list);
+
+ rx_sc->sc_xarray_element = sc_xarray_element;
+ rx_sc->md_dst->u.macsec_info.sci = rx_sc->sci;
+ mutex_unlock(&macsec->lock);
+
+ return 0;
+
+erase_xa_alloc:
+ xa_erase(&macsec->sc_xarray, sc_xarray_element->fs_id);
+destroy_sc_xarray_elemenet:
+ kfree(sc_xarray_element);
+destroy_rx_sc:
+ kfree(rx_sc);
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int i;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx_rx_sc->sci);
+ if (!rx_sc) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rx_sc->active == ctx_rx_sc->active)
+ goto out;
+
+ rx_sc->active = ctx_rx_sc->active;
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active);
+ if (err)
+ goto out;
+ }
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc)
+{
+ struct mlx5e_macsec_sa *rx_sa;
+ int i;
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
+
+ kfree(rx_sa);
+ rx_sc->rx_sa[i] = NULL;
+ }
+
+ /* At this point the relevant MACsec offload Rx rule already removed at
+ * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current
+ * Rx related data propagating using xa_erase which uses rcu to sync,
+ * once fs_id is erased then this rx_sc is hidden from datapath.
+ */
+ list_del_rcu(&rx_sc->rx_sc_list_element);
+ xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id);
+ metadata_dst_free(rx_sc->md_dst);
+ kfree(rx_sc->sc_xarray_element);
+ kfree_rcu(rx_sc);
+}
+
+static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx->rx_sc->sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ macsec_del_rxsc_ctx(macsec, rx_sc);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx)
+{
+ const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ sci_t sci = ctx_rx_sa->sc->sci;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rx_sc->rx_sa[assoc_num]) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld rx_sa %d already exist\n",
+ sci, assoc_num);
+ err = -EEXIST;
+ goto out;
+ }
+
+ rx_sa = kzalloc(sizeof(*rx_sa), GFP_KERNEL);
+ if (!rx_sa) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rx_sa->active = ctx_rx_sa->active;
+ rx_sa->next_pn = ctx_rx_sa->next_pn;
+ rx_sa->sci = sci;
+ rx_sa->assoc_num = assoc_num;
+ rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id;
+
+ if (ctx->secy->xpn)
+ update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves,
+ ctx_rx_sa->ssci);
+
+ err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len,
+ MLX5_ACCEL_OBJ_MACSEC_KEY,
+ &rx_sa->enc_key_id);
+ if (err)
+ goto destroy_sa;
+
+ rx_sc->rx_sa[assoc_num] = rx_sa;
+ if (!rx_sa->active)
+ goto out;
+
+ //TODO - add support for both authentication and encryption flows
+ err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false);
+ if (err)
+ goto destroy_encryption_key;
+
+ goto out;
+
+destroy_encryption_key:
+ rx_sc->rx_sa[assoc_num] = NULL;
+ mlx5_destroy_encryption_key(mdev, rx_sa->enc_key_id);
+destroy_sa:
+ kfree(rx_sa);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx)
+{
+ const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ sci_t sci = ctx_rx_sa->sc->sci;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sa = rx_sc->rx_sa[assoc_num];
+ if (!rx_sa) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n",
+ sci, assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (ctx->sa.update_pn) {
+ netdev_err(ctx->netdev,
+ "MACsec offload update RX sa %d PN isn't supported\n",
+ assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active);
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ sci_t sci = ctx->sa.rx_sa->sc->sci;
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ u8 assoc_num = ctx->sa.assoc_num;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci);
+ if (!rx_sc) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld doesn't exist\n",
+ ctx->sa.rx_sa->sc->sci);
+ err = -EINVAL;
+ goto out;
+ }
+
+ rx_sa = rx_sc->rx_sa[assoc_num];
+ if (!rx_sa) {
+ netdev_err(ctx->netdev,
+ "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n",
+ sci, assoc_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
+ kfree(rx_sa);
+ rx_sc->rx_sa[assoc_num] = NULL;
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_add_secy(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct net_device *dev = ctx->secy->netdev;
+ const struct net_device *netdev = ctx->netdev;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec *macsec;
+ int err = 0;
+
+ if (!mlx5e_macsec_secy_features_validate(ctx))
+ return -EINVAL;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ if (mlx5e_macsec_get_macsec_device_context(macsec, ctx)) {
+ netdev_err(netdev, "MACsec offload: MACsec net_device already exist\n");
+ goto out;
+ }
+
+ if (macsec->num_of_devices >= MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES) {
+ netdev_err(netdev, "Currently, only %d MACsec offload devices can be set\n",
+ MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES);
+ err = -EBUSY;
+ goto out;
+ }
+
+ macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL);
+ if (!macsec_device) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ macsec_device->dev_addr = kmemdup(dev->dev_addr, dev->addr_len, GFP_KERNEL);
+ if (!macsec_device->dev_addr) {
+ kfree(macsec_device);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ macsec_device->netdev = dev;
+
+ INIT_LIST_HEAD_RCU(&macsec_device->macsec_rx_sc_list_head);
+ list_add_rcu(&macsec_device->macsec_device_list_element, &macsec->macsec_device_list_head);
+
+ ++macsec->num_of_devices;
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int macsec_upd_secy_hw_address(struct macsec_context *ctx,
+ struct mlx5e_macsec_device *macsec_device)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct net_device *dev = ctx->secy->netdev;
+ struct mlx5e_macsec *macsec = priv->macsec;
+ struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
+ struct mlx5e_macsec_sa *rx_sa;
+ struct list_head *list;
+ int i, err = 0;
+
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa || !rx_sa->macsec_rule)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ }
+ }
+
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ if (rx_sa->active) {
+ err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false);
+ if (err)
+ goto out;
+ }
+ }
+ }
+
+ memcpy(macsec_device->dev_addr, dev->dev_addr, dev->addr_len);
+out:
+ return err;
+}
+
+/* this function is called from 2 macsec ops functions:
+ * macsec_set_mac_address – MAC address was changed, therefore we need to destroy
+ * and create new Tx contexts(macsec object + steering).
+ * macsec_changelink – in this case the tx SC or SecY may be changed, therefore need to
+ * destroy Tx and Rx contexts(macsec object + steering)
+ */
+static int mlx5e_macsec_upd_secy(struct macsec_context *ctx)
+{
+ const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc;
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ const struct net_device *dev = ctx->secy->netdev;
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ int i, err = 0;
+
+ if (!mlx5e_macsec_secy_features_validate(ctx))
+ return -EINVAL;
+
+ mutex_lock(&priv->macsec->lock);
+
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* if the dev_addr hasn't change, it mean the callback is from macsec_changelink */
+ if (!memcmp(macsec_device->dev_addr, dev->dev_addr, dev->addr_len)) {
+ err = macsec_upd_secy_hw_address(ctx, macsec_device);
+ if (err)
+ goto out;
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ tx_sa = macsec_device->tx_sa[i];
+ if (!tx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ tx_sa = macsec_device->tx_sa[i];
+ if (!tx_sa)
+ continue;
+
+ if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) {
+ err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static int mlx5e_macsec_del_secy(struct macsec_context *ctx)
+{
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec_device *macsec_device;
+ struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
+ struct mlx5e_macsec_sa *tx_sa;
+ struct mlx5e_macsec *macsec;
+ struct list_head *list;
+ int err = 0;
+ int i;
+
+ mutex_lock(&priv->macsec->lock);
+ macsec = priv->macsec;
+ macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx);
+ if (!macsec_device) {
+ netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n");
+ err = -EINVAL;
+
+ goto out;
+ }
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ tx_sa = macsec_device->tx_sa[i];
+ if (!tx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
+ mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id);
+ kfree(tx_sa);
+ macsec_device->tx_sa[i] = NULL;
+ }
+
+ list = &macsec_device->macsec_rx_sc_list_head;
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element)
+ macsec_del_rxsc_ctx(macsec, rx_sc);
+
+ kfree(macsec_device->dev_addr);
+ macsec_device->dev_addr = NULL;
+
+ list_del_rcu(&macsec_device->macsec_device_list_element);
+ --macsec->num_of_devices;
+ kfree(macsec_device);
+
+out:
+ mutex_unlock(&macsec->lock);
+
+ return err;
+}
+
+static void macsec_build_accel_attrs(struct mlx5e_macsec_sa *sa,
+ struct mlx5_macsec_obj_attrs *attrs)
+{
+ attrs->epn_state.epn_msb = sa->epn_state.epn_msb;
+ attrs->epn_state.overlap = sa->epn_state.overlap;
+}
+
+static void macsec_aso_build_wqe_ctrl_seg(struct mlx5e_macsec_aso *macsec_aso,
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl,
+ struct mlx5_aso_ctrl_param *param)
+{
+ memset(aso_ctrl, 0, sizeof(*aso_ctrl));
+ if (macsec_aso->umr->dma_addr) {
+ aso_ctrl->va_l = cpu_to_be32(macsec_aso->umr->dma_addr | ASO_CTRL_READ_EN);
+ aso_ctrl->va_h = cpu_to_be32((u64)macsec_aso->umr->dma_addr >> 32);
+ aso_ctrl->l_key = cpu_to_be32(macsec_aso->umr->mkey);
+ }
+
+ if (!param)
+ return;
+
+ aso_ctrl->data_mask_mode = param->data_mask_mode << 6;
+ aso_ctrl->condition_1_0_operand = param->condition_1_operand |
+ param->condition_0_operand << 4;
+ aso_ctrl->condition_1_0_offset = param->condition_1_offset |
+ param->condition_0_offset << 4;
+ aso_ctrl->data_offset_condition_operand = param->data_offset |
+ param->condition_operand << 6;
+ aso_ctrl->condition_0_data = cpu_to_be32(param->condition_0_data);
+ aso_ctrl->condition_0_mask = cpu_to_be32(param->condition_0_mask);
+ aso_ctrl->condition_1_data = cpu_to_be32(param->condition_1_data);
+ aso_ctrl->condition_1_mask = cpu_to_be32(param->condition_1_mask);
+ aso_ctrl->bitwise_data = cpu_to_be64(param->bitwise_data);
+ aso_ctrl->data_mask = cpu_to_be64(param->data_mask);
+}
+
+static int mlx5e_macsec_modify_obj(struct mlx5_core_dev *mdev, struct mlx5_macsec_obj_attrs *attrs,
+ u32 macsec_id)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_macsec_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(query_macsec_obj_out)];
+ u64 modify_field_select = 0;
+ void *obj;
+ int err;
+
+ /* General object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_id);
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err) {
+ mlx5_core_err(mdev, "Query MACsec object failed (Object id %d), err = %d\n",
+ macsec_id, err);
+ return err;
+ }
+
+ obj = MLX5_ADDR_OF(query_macsec_obj_out, out, macsec_object);
+ modify_field_select = MLX5_GET64(macsec_offload_obj, obj, modify_field_select);
+
+ /* EPN */
+ if (!(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP) ||
+ !(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB)) {
+ mlx5_core_dbg(mdev, "MACsec object field is not modifiable (Object id %d)\n",
+ macsec_id);
+ return -EOPNOTSUPP;
+ }
+
+ obj = MLX5_ADDR_OF(modify_macsec_obj_in, in, macsec_object);
+ MLX5_SET64(macsec_offload_obj, obj, modify_field_select,
+ MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP | MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB);
+ MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb);
+ MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap);
+
+ /* General object fields set */
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void macsec_aso_build_ctrl(struct mlx5e_macsec_aso *aso,
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl,
+ struct mlx5e_macsec_aso_in *in)
+{
+ struct mlx5_aso_ctrl_param param = {};
+
+ param.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT;
+ param.condition_0_operand = MLX5_ASO_ALWAYS_TRUE;
+ param.condition_1_operand = MLX5_ASO_ALWAYS_TRUE;
+ if (in->mode == MLX5_MACSEC_EPN) {
+ param.data_offset = MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
+ param.bitwise_data = BIT_ULL(54);
+ param.data_mask = param.bitwise_data;
+ }
+ macsec_aso_build_wqe_ctrl_seg(aso, aso_ctrl, &param);
+}
+
+static int macsec_aso_set_arm_event(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_aso_in *in)
+{
+ struct mlx5e_macsec_aso *aso;
+ struct mlx5_aso_wqe *aso_wqe;
+ struct mlx5_aso *maso;
+ int err;
+
+ aso = &macsec->aso;
+ maso = aso->maso;
+
+ mutex_lock(&aso->aso_lock);
+ aso_wqe = mlx5_aso_get_wqe(maso);
+ mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_MACSEC);
+ macsec_aso_build_ctrl(aso, &aso_wqe->aso_ctrl, in);
+ mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl);
+ err = mlx5_aso_poll_cq(maso, false);
+ mutex_unlock(&aso->aso_lock);
+
+ return err;
+}
+
+static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec,
+ struct mlx5e_macsec_aso_in *in, struct mlx5e_macsec_aso_out *out)
+{
+ struct mlx5e_macsec_aso *aso;
+ struct mlx5_aso_wqe *aso_wqe;
+ struct mlx5_aso *maso;
+ unsigned long expires;
+ int err;
+
+ aso = &macsec->aso;
+ maso = aso->maso;
+
+ mutex_lock(&aso->aso_lock);
+
+ aso_wqe = mlx5_aso_get_wqe(maso);
+ mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_MACSEC);
+ macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL);
+
+ mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl);
+ expires = jiffies + msecs_to_jiffies(10);
+ do {
+ err = mlx5_aso_poll_cq(maso, false);
+ if (err)
+ usleep_range(2, 10);
+ } while (err && time_is_after_jiffies(expires));
+
+ if (err)
+ goto err_out;
+
+ if (MLX5_GET(macsec_aso, aso->umr->ctx, epn_event_arm))
+ out->event_arm |= MLX5E_ASO_EPN_ARM;
+
+ out->mode_param = MLX5_GET(macsec_aso, aso->umr->ctx, mode_parameter);
+
+err_out:
+ mutex_unlock(&aso->aso_lock);
+ return err;
+}
+
+static struct mlx5e_macsec_sa *get_macsec_tx_sa_from_obj_id(const struct mlx5e_macsec *macsec,
+ const u32 obj_id)
+{
+ const struct list_head *device_list;
+ struct mlx5e_macsec_sa *macsec_sa;
+ struct mlx5e_macsec_device *iter;
+ int i;
+
+ device_list = &macsec->macsec_device_list_head;
+
+ list_for_each_entry(iter, device_list, macsec_device_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ macsec_sa = iter->tx_sa[i];
+ if (!macsec_sa || !macsec_sa->active)
+ continue;
+ if (macsec_sa->macsec_obj_id == obj_id)
+ return macsec_sa;
+ }
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_macsec_sa *get_macsec_rx_sa_from_obj_id(const struct mlx5e_macsec *macsec,
+ const u32 obj_id)
+{
+ const struct list_head *device_list, *sc_list;
+ struct mlx5e_macsec_rx_sc *mlx5e_rx_sc;
+ struct mlx5e_macsec_sa *macsec_sa;
+ struct mlx5e_macsec_device *iter;
+ int i;
+
+ device_list = &macsec->macsec_device_list_head;
+
+ list_for_each_entry(iter, device_list, macsec_device_list_element) {
+ sc_list = &iter->macsec_rx_sc_list_head;
+ list_for_each_entry(mlx5e_rx_sc, sc_list, rx_sc_list_element) {
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ macsec_sa = mlx5e_rx_sc->rx_sa[i];
+ if (!macsec_sa || !macsec_sa->active)
+ continue;
+ if (macsec_sa->macsec_obj_id == obj_id)
+ return macsec_sa;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void macsec_epn_update(struct mlx5e_macsec *macsec, struct mlx5_core_dev *mdev,
+ struct mlx5e_macsec_sa *sa, u32 obj_id, u32 mode_param)
+{
+ struct mlx5_macsec_obj_attrs attrs = {};
+ struct mlx5e_macsec_aso_in in = {};
+
+ /* When the bottom of the replay protection window (mode_param) crosses 2^31 (half sequence
+ * number wraparound) hence mode_param > MLX5_MACSEC_EPN_SCOPE_MID the SW should update the
+ * esn_overlap to OLD (1).
+ * When the bottom of the replay protection window (mode_param) crosses 2^32 (full sequence
+ * number wraparound) hence mode_param < MLX5_MACSEC_EPN_SCOPE_MID since it did a
+ * wraparound, the SW should update the esn_overlap to NEW (0), and increment the esn_msb.
+ */
+
+ if (mode_param < MLX5_MACSEC_EPN_SCOPE_MID) {
+ sa->epn_state.epn_msb++;
+ sa->epn_state.overlap = 0;
+ } else {
+ sa->epn_state.overlap = 1;
+ }
+
+ macsec_build_accel_attrs(sa, &attrs);
+ mlx5e_macsec_modify_obj(mdev, &attrs, obj_id);
+
+ /* Re-set EPN arm event */
+ in.obj_id = obj_id;
+ in.mode = MLX5_MACSEC_EPN;
+ macsec_aso_set_arm_event(mdev, macsec, &in);
+}
+
+static void macsec_async_event(struct work_struct *work)
+{
+ struct mlx5e_macsec_async_work *async_work;
+ struct mlx5e_macsec_aso_out out = {};
+ struct mlx5e_macsec_aso_in in = {};
+ struct mlx5e_macsec_sa *macsec_sa;
+ struct mlx5e_macsec *macsec;
+ struct mlx5_core_dev *mdev;
+ u32 obj_id;
+
+ async_work = container_of(work, struct mlx5e_macsec_async_work, work);
+ macsec = async_work->macsec;
+ mutex_lock(&macsec->lock);
+
+ mdev = async_work->mdev;
+ obj_id = async_work->obj_id;
+ macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id);
+ if (!macsec_sa) {
+ macsec_sa = get_macsec_rx_sa_from_obj_id(macsec, obj_id);
+ if (!macsec_sa) {
+ mlx5_core_dbg(mdev, "MACsec SA is not found (SA object id %d)\n", obj_id);
+ goto out_async_work;
+ }
+ }
+
+ /* Query MACsec ASO context */
+ in.obj_id = obj_id;
+ macsec_aso_query(mdev, macsec, &in, &out);
+
+ /* EPN case */
+ if (macsec_sa->epn_state.epn_enabled && !(out.event_arm & MLX5E_ASO_EPN_ARM))
+ macsec_epn_update(macsec, mdev, macsec_sa, obj_id, out.mode_param);
+
+out_async_work:
+ kfree(async_work);
+ mutex_unlock(&macsec->lock);
+}
+
+static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_macsec *macsec = container_of(nb, struct mlx5e_macsec, nb);
+ struct mlx5e_macsec_async_work *async_work;
+ struct mlx5_eqe_obj_change *obj_change;
+ struct mlx5_eqe *eqe = data;
+ u16 obj_type;
+ u32 obj_id;
+
+ if (event != MLX5_EVENT_TYPE_OBJECT_CHANGE)
+ return NOTIFY_DONE;
+
+ obj_change = &eqe->data.obj_change;
+ obj_type = be16_to_cpu(obj_change->obj_type);
+ obj_id = be32_to_cpu(obj_change->obj_id);
+
+ if (obj_type != MLX5_GENERAL_OBJECT_TYPES_MACSEC)
+ return NOTIFY_DONE;
+
+ async_work = kzalloc(sizeof(*async_work), GFP_ATOMIC);
+ if (!async_work)
+ return NOTIFY_DONE;
+
+ async_work->macsec = macsec;
+ async_work->mdev = macsec->mdev;
+ async_work->obj_id = obj_id;
+
+ INIT_WORK(&async_work->work, macsec_async_event);
+
+ WARN_ON(!queue_work(macsec->wq, &async_work->work));
+
+ return NOTIFY_OK;
+}
+
+static int mlx5e_macsec_aso_init(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev)
+{
+ struct mlx5_aso *maso;
+ int err;
+
+ err = mlx5_core_alloc_pd(mdev, &aso->pdn);
+ if (err) {
+ mlx5_core_err(mdev,
+ "MACsec offload: Failed to alloc pd for MACsec ASO, err=%d\n",
+ err);
+ return err;
+ }
+
+ maso = mlx5_aso_create(mdev, aso->pdn);
+ if (IS_ERR(maso)) {
+ err = PTR_ERR(maso);
+ goto err_aso;
+ }
+
+ err = mlx5e_macsec_aso_reg_mr(mdev, aso);
+ if (err)
+ goto err_aso_reg;
+
+ mutex_init(&aso->aso_lock);
+
+ aso->maso = maso;
+
+ return 0;
+
+err_aso_reg:
+ mlx5_aso_destroy(maso);
+err_aso:
+ mlx5_core_dealloc_pd(mdev, aso->pdn);
+ return err;
+}
+
+static void mlx5e_macsec_aso_cleanup(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev)
+{
+ if (!aso)
+ return;
+
+ mlx5e_macsec_aso_dereg_mr(mdev, aso);
+
+ mlx5_aso_destroy(aso->maso);
+
+ mlx5_core_dealloc_pd(mdev, aso->pdn);
+}
+
+bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev)
+{
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD))
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, log_max_macsec_offload))
+ return false;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, macsec_decrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_remove_macsec))
+ return false;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, macsec_encrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_macsec))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_encrypt) &&
+ !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_encrypt))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_decrypt) &&
+ !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_decrypt))
+ return false;
+
+ return true;
+}
+
+void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats)
+{
+ mlx5e_macsec_fs_get_stats_fill(macsec->macsec_fs, macsec_stats);
+}
+
+struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec)
+{
+ if (!macsec)
+ return NULL;
+
+ return &macsec->stats;
+}
+
+static const struct macsec_ops macsec_offload_ops = {
+ .mdo_add_txsa = mlx5e_macsec_add_txsa,
+ .mdo_upd_txsa = mlx5e_macsec_upd_txsa,
+ .mdo_del_txsa = mlx5e_macsec_del_txsa,
+ .mdo_add_rxsc = mlx5e_macsec_add_rxsc,
+ .mdo_upd_rxsc = mlx5e_macsec_upd_rxsc,
+ .mdo_del_rxsc = mlx5e_macsec_del_rxsc,
+ .mdo_add_rxsa = mlx5e_macsec_add_rxsa,
+ .mdo_upd_rxsa = mlx5e_macsec_upd_rxsa,
+ .mdo_del_rxsa = mlx5e_macsec_del_rxsa,
+ .mdo_add_secy = mlx5e_macsec_add_secy,
+ .mdo_upd_secy = mlx5e_macsec_upd_secy,
+ .mdo_del_secy = mlx5e_macsec_del_secy,
+};
+
+bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ u32 fs_id;
+
+ fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci);
+ if (!fs_id)
+ goto err_out;
+
+ return true;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return false;
+}
+
+void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ u32 fs_id;
+
+ fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci);
+ if (!fs_id)
+ return;
+
+ eseg->flow_table_metadata = cpu_to_be32(MLX5_ETH_WQE_FT_META_MACSEC | fs_id << 2);
+}
+
+void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
+{
+ struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element;
+ u32 macsec_meta_data = be32_to_cpu(cqe->ft_metadata);
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_macsec_rx_sc *rx_sc;
+ struct mlx5e_macsec *macsec;
+ u32 fs_id;
+
+ macsec = priv->macsec;
+ if (!macsec)
+ return;
+
+ fs_id = MLX5_MACSEC_RX_METADAT_HANDLE(macsec_meta_data);
+
+ rcu_read_lock();
+ sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id);
+ rx_sc = sc_xarray_element->rx_sc;
+ if (rx_sc) {
+ dst_hold(&rx_sc->md_dst->dst);
+ skb_dst_set(skb, &rx_sc->md_dst->dst);
+ }
+
+ rcu_read_unlock();
+}
+
+void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+
+ if (!mlx5e_is_macsec_device(priv->mdev))
+ return;
+
+ /* Enable MACsec */
+ mlx5_core_dbg(priv->mdev, "mlx5e: MACsec acceleration enabled\n");
+ netdev->macsec_ops = &macsec_offload_ops;
+ netdev->features |= NETIF_F_HW_MACSEC;
+ netif_keep_dst(netdev);
+}
+
+int mlx5e_macsec_init(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_macsec *macsec = NULL;
+ struct mlx5e_macsec_fs *macsec_fs;
+ int err;
+
+ if (!mlx5e_is_macsec_device(priv->mdev)) {
+ mlx5_core_dbg(mdev, "Not a MACsec offload device\n");
+ return 0;
+ }
+
+ macsec = kzalloc(sizeof(*macsec), GFP_KERNEL);
+ if (!macsec)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&macsec->macsec_device_list_head);
+ mutex_init(&macsec->lock);
+
+ err = rhashtable_init(&macsec->sci_hash, &rhash_sci);
+ if (err) {
+ mlx5_core_err(mdev, "MACsec offload: Failed to init SCI hash table, err=%d\n",
+ err);
+ goto err_hash;
+ }
+
+ err = mlx5e_macsec_aso_init(&macsec->aso, priv->mdev);
+ if (err) {
+ mlx5_core_err(mdev, "MACsec offload: Failed to init aso, err=%d\n", err);
+ goto err_aso;
+ }
+
+ macsec->wq = alloc_ordered_workqueue("mlx5e_macsec_%s", 0, priv->netdev->name);
+ if (!macsec->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ xa_init_flags(&macsec->sc_xarray, XA_FLAGS_ALLOC1);
+
+ priv->macsec = macsec;
+
+ macsec->mdev = mdev;
+
+ macsec_fs = mlx5e_macsec_fs_init(mdev, priv->netdev);
+ if (!macsec_fs) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ macsec->macsec_fs = macsec_fs;
+
+ macsec->nb.notifier_call = macsec_obj_change_event;
+ mlx5_notifier_register(mdev, &macsec->nb);
+
+ mlx5_core_dbg(mdev, "MACsec attached to netdevice\n");
+
+ return 0;
+
+err_out:
+ destroy_workqueue(macsec->wq);
+err_wq:
+ mlx5e_macsec_aso_cleanup(&macsec->aso, priv->mdev);
+err_aso:
+ rhashtable_destroy(&macsec->sci_hash);
+err_hash:
+ kfree(macsec);
+ priv->macsec = NULL;
+ return err;
+}
+
+void mlx5e_macsec_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_macsec *macsec = priv->macsec;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!macsec)
+ return;
+
+ mlx5_notifier_unregister(mdev, &macsec->nb);
+ mlx5e_macsec_fs_cleanup(macsec->macsec_fs);
+ destroy_workqueue(macsec->wq);
+ mlx5e_macsec_aso_cleanup(&macsec->aso, mdev);
+ rhashtable_destroy(&macsec->sci_hash);
+ mutex_destroy(&macsec->lock);
+ kfree(macsec);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h
new file mode 100644
index 000000000..347380a2c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_ACCEL_MACSEC_H__
+#define __MLX5_EN_ACCEL_MACSEC_H__
+
+#ifdef CONFIG_MLX5_EN_MACSEC
+
+#include <linux/mlx5/driver.h>
+#include <net/macsec.h>
+#include <net/dst_metadata.h>
+
+/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */
+#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */
+#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX
+#define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1)
+#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK)
+
+struct mlx5e_priv;
+struct mlx5e_macsec;
+
+struct mlx5e_macsec_stats {
+ u64 macsec_rx_pkts;
+ u64 macsec_rx_bytes;
+ u64 macsec_rx_pkts_drop;
+ u64 macsec_rx_bytes_drop;
+ u64 macsec_tx_pkts;
+ u64 macsec_tx_bytes;
+ u64 macsec_tx_pkts_drop;
+ u64 macsec_tx_bytes_drop;
+};
+
+void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_macsec_init(struct mlx5e_priv *priv);
+void mlx5e_macsec_cleanup(struct mlx5e_priv *priv);
+bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb);
+void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg);
+
+static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+
+ return md_dst && (md_dst->type == METADATA_MACSEC);
+}
+
+static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe)
+{
+ return MLX5_MACSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata));
+}
+
+void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe);
+bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev);
+void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats);
+struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec);
+
+#else
+
+static inline void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv) {}
+static inline int mlx5e_macsec_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) {}
+static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb) { return false; }
+static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; }
+static inline void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe)
+{}
+static inline bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) { return false; }
+#endif /* CONFIG_MLX5_EN_MACSEC */
+
+#endif /* __MLX5_ACCEL_EN_MACSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
new file mode 100644
index 000000000..6ecf0bf23
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
@@ -0,0 +1,1390 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/macsec.h>
+#include <linux/netdevice.h>
+#include <linux/mlx5/qp.h>
+#include "fs_core.h"
+#include "en/fs.h"
+#include "en_accel/macsec_fs.h"
+#include "mlx5_core.h"
+
+/* MACsec TX flow steering */
+#define CRYPTO_NUM_MAXSEC_FTE BIT(15)
+#define CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE 1
+
+#define TX_CRYPTO_TABLE_LEVEL 0
+#define TX_CRYPTO_TABLE_NUM_GROUPS 3
+#define TX_CRYPTO_TABLE_MKE_GROUP_SIZE 1
+#define TX_CRYPTO_TABLE_SA_GROUP_SIZE \
+ (CRYPTO_NUM_MAXSEC_FTE - (TX_CRYPTO_TABLE_MKE_GROUP_SIZE + \
+ CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE))
+#define TX_CHECK_TABLE_LEVEL 1
+#define TX_CHECK_TABLE_NUM_FTE 2
+#define RX_CRYPTO_TABLE_LEVEL 0
+#define RX_CHECK_TABLE_LEVEL 1
+#define RX_CHECK_TABLE_NUM_FTE 3
+#define RX_CRYPTO_TABLE_NUM_GROUPS 3
+#define RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE \
+ ((CRYPTO_NUM_MAXSEC_FTE - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE) / 2)
+#define RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE \
+ (CRYPTO_NUM_MAXSEC_FTE - RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE)
+#define RX_NUM_OF_RULES_PER_SA 2
+
+#define MLX5_MACSEC_TAG_LEN 8 /* SecTAG length with ethertype and without the optional SCI */
+#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK 0x23
+#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET 0x8
+#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET 0x5
+#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT (0x1 << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET)
+#define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8
+#define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN)
+
+/* MACsec RX flow steering */
+#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E
+
+struct mlx5_sectag_header {
+ __be16 ethertype;
+ u8 tci_an;
+ u8 sl;
+ u32 pn;
+ u8 sci[MACSEC_SCI_LEN]; /* optional */
+} __packed;
+
+struct mlx5e_macsec_tx_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ u32 fs_id;
+};
+
+struct mlx5e_macsec_tables {
+ struct mlx5e_flow_table ft_crypto;
+ struct mlx5_flow_handle *crypto_miss_rule;
+
+ struct mlx5_flow_table *ft_check;
+ struct mlx5_flow_group *ft_check_group;
+ struct mlx5_fc *check_miss_rule_counter;
+ struct mlx5_flow_handle *check_miss_rule;
+ struct mlx5_fc *check_rule_counter;
+
+ u32 refcnt;
+};
+
+struct mlx5e_macsec_tx {
+ struct mlx5_flow_handle *crypto_mke_rule;
+ struct mlx5_flow_handle *check_rule;
+
+ struct ida tx_halloc;
+
+ struct mlx5e_macsec_tables tables;
+};
+
+struct mlx5e_macsec_rx_rule {
+ struct mlx5_flow_handle *rule[RX_NUM_OF_RULES_PER_SA];
+ struct mlx5_modify_hdr *meta_modhdr;
+};
+
+struct mlx5e_macsec_rx {
+ struct mlx5_flow_handle *check_rule[2];
+ struct mlx5_pkt_reformat *check_rule_pkt_reformat[2];
+
+ struct mlx5e_macsec_tables tables;
+};
+
+union mlx5e_macsec_rule {
+ struct mlx5e_macsec_tx_rule tx_rule;
+ struct mlx5e_macsec_rx_rule rx_rule;
+};
+
+struct mlx5e_macsec_fs {
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_macsec_tx *tx_fs;
+ struct mlx5e_macsec_rx *rx_fs;
+};
+
+static void macsec_fs_tx_destroy(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct mlx5e_macsec_tables *tx_tables;
+
+ tx_tables = &tx_fs->tables;
+
+ /* Tx check table */
+ if (tx_fs->check_rule) {
+ mlx5_del_flow_rules(tx_fs->check_rule);
+ tx_fs->check_rule = NULL;
+ }
+
+ if (tx_tables->check_miss_rule) {
+ mlx5_del_flow_rules(tx_tables->check_miss_rule);
+ tx_tables->check_miss_rule = NULL;
+ }
+
+ if (tx_tables->ft_check_group) {
+ mlx5_destroy_flow_group(tx_tables->ft_check_group);
+ tx_tables->ft_check_group = NULL;
+ }
+
+ if (tx_tables->ft_check) {
+ mlx5_destroy_flow_table(tx_tables->ft_check);
+ tx_tables->ft_check = NULL;
+ }
+
+ /* Tx crypto table */
+ if (tx_fs->crypto_mke_rule) {
+ mlx5_del_flow_rules(tx_fs->crypto_mke_rule);
+ tx_fs->crypto_mke_rule = NULL;
+ }
+
+ if (tx_tables->crypto_miss_rule) {
+ mlx5_del_flow_rules(tx_tables->crypto_miss_rule);
+ tx_tables->crypto_miss_rule = NULL;
+ }
+
+ mlx5e_destroy_flow_table(&tx_tables->ft_crypto);
+}
+
+static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int mclen = MLX5_ST_SZ_BYTES(fte_match_param);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(TX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+
+ if (!in) {
+ kfree(ft->g);
+ ft->g = NULL;
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ /* Flow Group for MKE match */
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += TX_CRYPTO_TABLE_MKE_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow Group for SA rules */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
+ MLX5_SET(fte_match_param, mc, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_MACSEC_MASK);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += TX_CRYPTO_TABLE_SA_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow Group for l2 traps */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static struct mlx5_flow_table
+ *macsec_fs_auto_group_table_create(struct mlx5_flow_namespace *ns, int flags,
+ int level, int max_fte)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *fdb = NULL;
+
+ /* reserve entry for the match all miss group and rule */
+ ft_attr.autogroup.num_reserved_entries = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.prio = 0;
+ ft_attr.flags = flags;
+ ft_attr.level = level;
+ ft_attr.max_fte = max_fte;
+
+ fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+
+ return fdb;
+}
+
+static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *tx_tables;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5e_flow_table *ft_crypto;
+ struct mlx5_flow_table *flow_table;
+ struct mlx5_flow_group *flow_group;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 *flow_group_in;
+ int err;
+
+ ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
+ if (!ns)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ err = -ENOMEM;
+ goto out_spec;
+ }
+
+ tx_tables = &tx_fs->tables;
+ ft_crypto = &tx_tables->ft_crypto;
+
+ /* Tx crypto table */
+ ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft_attr.level = TX_CRYPTO_TABLE_LEVEL;
+ ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE;
+
+ flow_table = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "Failed to create MACsec Tx crypto table err(%d)\n", err);
+ goto out_flow_group;
+ }
+ ft_crypto->t = flow_table;
+
+ /* Tx crypto table groups */
+ err = macsec_fs_tx_create_crypto_table_groups(ft_crypto);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+ err);
+ goto err;
+ }
+
+ /* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */
+ memset(&flow_act, 0, sizeof(flow_act));
+ memset(spec, 0, sizeof(*spec));
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_PAE);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec TX MKE rule, err=%d\n", err);
+ goto err;
+ }
+ tx_fs->crypto_mke_rule = rule;
+
+ /* Tx crypto table Default miss rule */
+ memset(&flow_act, 0, sizeof(flow_act));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec Tx table default miss rule %d\n", err);
+ goto err;
+ }
+ tx_tables->crypto_miss_rule = rule;
+
+ /* Tx check table */
+ flow_table = macsec_fs_auto_group_table_create(ns, 0, TX_CHECK_TABLE_LEVEL,
+ TX_CHECK_TABLE_NUM_FTE);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "fail to create MACsec TX check table, err(%d)\n", err);
+ goto err;
+ }
+ tx_tables->ft_check = flow_table;
+
+ /* Tx check table Default miss group/rule */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1);
+ flow_group = mlx5_create_flow_group(tx_tables->ft_check, flow_group_in);
+ if (IS_ERR(flow_group)) {
+ err = PTR_ERR(flow_group);
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+ err);
+ goto err;
+ }
+ tx_tables->ft_check_group = flow_group;
+
+ /* Tx check table default drop rule */
+ memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+ memset(&flow_act, 0, sizeof(flow_act));
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to added MACsec tx check drop rule, err(%d)\n", err);
+ goto err;
+ }
+ tx_tables->check_miss_rule = rule;
+
+ /* Tx check table rule */
+ memset(spec, 0, sizeof(struct mlx5_flow_spec));
+ memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+ memset(&flow_act, 0, sizeof(flow_act));
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ flow_act.flags = FLOW_ACT_NO_APPEND;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter);
+ rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec check rule, err=%d\n", err);
+ goto err;
+ }
+ tx_fs->check_rule = rule;
+
+ goto out_flow_group;
+
+err:
+ macsec_fs_tx_destroy(macsec_fs);
+out_flow_group:
+ kvfree(flow_group_in);
+out_spec:
+ kvfree(spec);
+ return err;
+}
+
+static int macsec_fs_tx_ft_get(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct mlx5e_macsec_tables *tx_tables;
+ int err = 0;
+
+ tx_tables = &tx_fs->tables;
+ if (tx_tables->refcnt)
+ goto out;
+
+ err = macsec_fs_tx_create(macsec_fs);
+ if (err)
+ return err;
+
+out:
+ tx_tables->refcnt++;
+ return err;
+}
+
+static void macsec_fs_tx_ft_put(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables;
+
+ if (--tx_tables->refcnt)
+ return;
+
+ macsec_fs_tx_destroy(macsec_fs);
+}
+
+static int macsec_fs_tx_setup_fte(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ u32 macsec_obj_id,
+ u32 *fs_id)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ int err = 0;
+ u32 id;
+
+ err = ida_alloc_range(&tx_fs->tx_halloc, 1,
+ MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES,
+ GFP_KERNEL);
+ if (err < 0)
+ return err;
+
+ id = err;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+ /* Metadata match */
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_MACSEC_MASK);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_MACSEC | id << 2);
+
+ *fs_id = id;
+ flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC;
+ flow_act->crypto.obj_id = macsec_obj_id;
+
+ mlx5_core_dbg(macsec_fs->mdev, "Tx fte: macsec obj_id %u, fs_id %u\n", macsec_obj_id, id);
+ return 0;
+}
+
+static void macsec_fs_tx_create_sectag_header(const struct macsec_context *ctx,
+ char *reformatbf,
+ size_t *reformat_size)
+{
+ const struct macsec_secy *secy = ctx->secy;
+ bool sci_present = macsec_send_sci(secy);
+ struct mlx5_sectag_header sectag = {};
+ const struct macsec_tx_sc *tx_sc;
+
+ tx_sc = &secy->tx_sc;
+ sectag.ethertype = htons(ETH_P_MACSEC);
+
+ if (sci_present) {
+ sectag.tci_an |= MACSEC_TCI_SC;
+ memcpy(&sectag.sci, &secy->sci,
+ sizeof(sectag.sci));
+ } else {
+ if (tx_sc->end_station)
+ sectag.tci_an |= MACSEC_TCI_ES;
+ if (tx_sc->scb)
+ sectag.tci_an |= MACSEC_TCI_SCB;
+ }
+
+ /* With GCM, C/E clear for !encrypt, both set for encrypt */
+ if (tx_sc->encrypt)
+ sectag.tci_an |= MACSEC_TCI_CONFID;
+ else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN)
+ sectag.tci_an |= MACSEC_TCI_C;
+
+ sectag.tci_an |= tx_sc->encoding_sa;
+
+ *reformat_size = MLX5_MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0);
+
+ memcpy(reformatbf, &sectag, *reformat_size);
+}
+
+static void macsec_fs_tx_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5e_macsec_tx_rule *tx_rule)
+{
+ if (tx_rule->rule) {
+ mlx5_del_flow_rules(tx_rule->rule);
+ tx_rule->rule = NULL;
+ }
+
+ if (tx_rule->pkt_reformat) {
+ mlx5_packet_reformat_dealloc(macsec_fs->mdev, tx_rule->pkt_reformat);
+ tx_rule->pkt_reformat = NULL;
+ }
+
+ if (tx_rule->fs_id) {
+ ida_free(&macsec_fs->tx_fs->tx_halloc, tx_rule->fs_id);
+ tx_rule->fs_id = 0;
+ }
+
+ kfree(tx_rule);
+
+ macsec_fs_tx_ft_put(macsec_fs);
+}
+
+static union mlx5e_macsec_rule *
+macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *macsec_ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 *sa_fs_id)
+{
+ char reformatbf[MLX5_MACSEC_TAG_LEN + MACSEC_SCI_LEN];
+ struct mlx5_pkt_reformat_params reformat_params = {};
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ union mlx5e_macsec_rule *macsec_rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *tx_tables;
+ struct mlx5e_macsec_tx_rule *tx_rule;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ size_t reformat_size;
+ int err = 0;
+ u32 fs_id;
+
+ tx_tables = &tx_fs->tables;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+
+ err = macsec_fs_tx_ft_get(macsec_fs);
+ if (err)
+ goto out_spec;
+
+ macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL);
+ if (!macsec_rule) {
+ macsec_fs_tx_ft_put(macsec_fs);
+ goto out_spec;
+ }
+
+ tx_rule = &macsec_rule->tx_rule;
+
+ /* Tx crypto table crypto rule */
+ macsec_fs_tx_create_sectag_header(macsec_ctx, reformatbf, &reformat_size);
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC;
+ reformat_params.size = reformat_size;
+ reformat_params.data = reformatbf;
+ flow_act.pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
+ if (IS_ERR(flow_act.pkt_reformat)) {
+ err = PTR_ERR(flow_act.pkt_reformat);
+ netdev_err(netdev, "Failed to allocate MACsec Tx reformat context err=%d\n", err);
+ goto err;
+ }
+ tx_rule->pkt_reformat = flow_act.pkt_reformat;
+
+ err = macsec_fs_tx_setup_fte(macsec_fs, spec, &flow_act, attrs->macsec_obj_id, &fs_id);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to add packet reformat for MACsec TX crypto rule, err=%d\n",
+ err);
+ goto err;
+ }
+
+ tx_rule->fs_id = fs_id;
+ *sa_fs_id = fs_id;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = tx_tables->ft_check;
+ rule = mlx5_add_flow_rules(tx_tables->ft_crypto.t, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec TX crypto rule, err=%d\n", err);
+ goto err;
+ }
+ tx_rule->rule = rule;
+
+ goto out_spec;
+
+err:
+ macsec_fs_tx_del_rule(macsec_fs, tx_rule);
+ macsec_rule = NULL;
+out_spec:
+ kvfree(spec);
+
+ return macsec_rule;
+}
+
+static void macsec_fs_tx_cleanup(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *tx_tables;
+
+ if (!tx_fs)
+ return;
+
+ tx_tables = &tx_fs->tables;
+ if (tx_tables->refcnt) {
+ netdev_err(macsec_fs->netdev,
+ "Can't destroy MACsec offload tx_fs, refcnt(%u) isn't 0\n",
+ tx_tables->refcnt);
+ return;
+ }
+
+ ida_destroy(&tx_fs->tx_halloc);
+
+ if (tx_tables->check_miss_rule_counter) {
+ mlx5_fc_destroy(mdev, tx_tables->check_miss_rule_counter);
+ tx_tables->check_miss_rule_counter = NULL;
+ }
+
+ if (tx_tables->check_rule_counter) {
+ mlx5_fc_destroy(mdev, tx_tables->check_rule_counter);
+ tx_tables->check_rule_counter = NULL;
+ }
+
+ kfree(tx_fs);
+ macsec_fs->tx_fs = NULL;
+}
+
+static int macsec_fs_tx_init(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *tx_tables;
+ struct mlx5e_macsec_tx *tx_fs;
+ struct mlx5_fc *flow_counter;
+ int err;
+
+ tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL);
+ if (!tx_fs)
+ return -ENOMEM;
+
+ tx_tables = &tx_fs->tables;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Tx encrypt flow counter, err(%d)\n",
+ err);
+ goto err_encrypt_counter;
+ }
+ tx_tables->check_rule_counter = flow_counter;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Tx drop flow counter, err(%d)\n",
+ err);
+ goto err_drop_counter;
+ }
+ tx_tables->check_miss_rule_counter = flow_counter;
+
+ ida_init(&tx_fs->tx_halloc);
+
+ macsec_fs->tx_fs = tx_fs;
+
+ return 0;
+
+err_drop_counter:
+ mlx5_fc_destroy(mdev, tx_tables->check_rule_counter);
+ tx_tables->check_rule_counter = NULL;
+
+err_encrypt_counter:
+ kfree(tx_fs);
+ macsec_fs->tx_fs = NULL;
+
+ return err;
+}
+
+static void macsec_fs_rx_destroy(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct mlx5e_macsec_tables *rx_tables;
+ int i;
+
+ /* Rx check table */
+ for (i = 1; i >= 0; --i) {
+ if (rx_fs->check_rule[i]) {
+ mlx5_del_flow_rules(rx_fs->check_rule[i]);
+ rx_fs->check_rule[i] = NULL;
+ }
+
+ if (rx_fs->check_rule_pkt_reformat[i]) {
+ mlx5_packet_reformat_dealloc(macsec_fs->mdev,
+ rx_fs->check_rule_pkt_reformat[i]);
+ rx_fs->check_rule_pkt_reformat[i] = NULL;
+ }
+ }
+
+ rx_tables = &rx_fs->tables;
+
+ if (rx_tables->check_miss_rule) {
+ mlx5_del_flow_rules(rx_tables->check_miss_rule);
+ rx_tables->check_miss_rule = NULL;
+ }
+
+ if (rx_tables->ft_check_group) {
+ mlx5_destroy_flow_group(rx_tables->ft_check_group);
+ rx_tables->ft_check_group = NULL;
+ }
+
+ if (rx_tables->ft_check) {
+ mlx5_destroy_flow_table(rx_tables->ft_check);
+ rx_tables->ft_check = NULL;
+ }
+
+ /* Rx crypto table */
+ if (rx_tables->crypto_miss_rule) {
+ mlx5_del_flow_rules(rx_tables->crypto_miss_rule);
+ rx_tables->crypto_miss_rule = NULL;
+ }
+
+ mlx5e_destroy_flow_table(&rx_tables->ft_crypto);
+}
+
+static int macsec_fs_rx_create_crypto_table_groups(struct mlx5e_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int mclen = MLX5_ST_SZ_BYTES(fte_match_param);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(RX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ /* Flow group for SA rule with SCI */
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS_5);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK <<
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+ MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_2);
+ MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_3);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow group for SA rule without SCI */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS_5);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_15_0);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Flow Group for l2 traps */
+ memset(in, 0, inlen);
+ memset(mc, 0, mclen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static int macsec_fs_rx_create_check_decap_rule(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_spec *spec,
+ int reformat_param_size)
+{
+ int rule_index = (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) ? 0 : 1;
+ u8 mlx5_reformat_buf[MLX5_SECTAG_HEADER_SIZE_WITH_SCI];
+ struct mlx5_pkt_reformat_params reformat_params = {};
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5_flow_handle *rule;
+ int err = 0;
+
+ rx_tables = &rx_fs->tables;
+
+ /* Rx check table decap 16B rule */
+ memset(dest, 0, sizeof(*dest));
+ memset(flow_act, 0, sizeof(*flow_act));
+ memset(spec, 0, sizeof(*spec));
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_DEL_MACSEC;
+ reformat_params.size = reformat_param_size;
+ reformat_params.data = mlx5_reformat_buf;
+ flow_act->pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
+ if (IS_ERR(flow_act->pkt_reformat)) {
+ err = PTR_ERR(flow_act->pkt_reformat);
+ netdev_err(netdev, "Failed to allocate MACsec Rx reformat context err=%d\n", err);
+ return err;
+ }
+ rx_fs->check_rule_pkt_reformat[rule_index] = flow_act->pkt_reformat;
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ /* MACsec syndrome match */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.macsec_syndrome);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.macsec_syndrome, 0);
+ /* ASO return reg syndrome match */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5;
+ /* Sectag TCI SC present bit*/
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ if (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI)
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT <<
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ flow_act->flags = FLOW_ACT_NO_APPEND;
+ flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest->type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest->counter_id = mlx5_fc_id(rx_tables->check_rule_counter);
+ rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to add MACsec Rx check rule, err=%d\n", err);
+ return err;
+ }
+
+ rx_fs->check_rule[rule_index] = rule;
+
+ return 0;
+}
+
+static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5e_flow_table *ft_crypto;
+ struct mlx5_flow_table *flow_table;
+ struct mlx5_flow_group *flow_group;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 *flow_group_in;
+ int err;
+
+ ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
+ if (!ns)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ err = -ENOMEM;
+ goto free_spec;
+ }
+
+ rx_tables = &rx_fs->tables;
+ ft_crypto = &rx_tables->ft_crypto;
+
+ /* Rx crypto table */
+ ft_attr.level = RX_CRYPTO_TABLE_LEVEL;
+ ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE;
+
+ flow_table = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "Failed to create MACsec Rx crypto table err(%d)\n", err);
+ goto out_flow_group;
+ }
+ ft_crypto->t = flow_table;
+
+ /* Rx crypto table groups */
+ err = macsec_fs_rx_create_crypto_table_groups(ft_crypto);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+ err);
+ goto err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev,
+ "Failed to add MACsec Rx crypto table default miss rule %d\n",
+ err);
+ goto err;
+ }
+ rx_tables->crypto_miss_rule = rule;
+
+ /* Rx check table */
+ flow_table = macsec_fs_auto_group_table_create(ns,
+ MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT,
+ RX_CHECK_TABLE_LEVEL,
+ RX_CHECK_TABLE_NUM_FTE);
+ if (IS_ERR(flow_table)) {
+ err = PTR_ERR(flow_table);
+ netdev_err(netdev, "fail to create MACsec RX check table, err(%d)\n", err);
+ goto err;
+ }
+ rx_tables->ft_check = flow_table;
+
+ /* Rx check table Default miss group/rule */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1);
+ flow_group = mlx5_create_flow_group(rx_tables->ft_check, flow_group_in);
+ if (IS_ERR(flow_group)) {
+ err = PTR_ERR(flow_group);
+ netdev_err(netdev,
+ "Failed to create default flow group for MACsec Rx check table err(%d)\n",
+ err);
+ goto err;
+ }
+ rx_tables->ft_check_group = flow_group;
+
+ /* Rx check table default drop rule */
+ memset(&flow_act, 0, sizeof(flow_act));
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev, "Failed to added MACsec Rx check drop rule, err(%d)\n", err);
+ goto err;
+ }
+ rx_tables->check_miss_rule = rule;
+
+ /* Rx check table decap rules */
+ err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec,
+ MLX5_SECTAG_HEADER_SIZE_WITH_SCI);
+ if (err)
+ goto err;
+
+ err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec,
+ MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI);
+ if (err)
+ goto err;
+
+ goto out_flow_group;
+
+err:
+ macsec_fs_rx_destroy(macsec_fs);
+out_flow_group:
+ kvfree(flow_group_in);
+free_spec:
+ kvfree(spec);
+ return err;
+}
+
+static int macsec_fs_rx_ft_get(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+ int err = 0;
+
+ if (rx_tables->refcnt)
+ goto out;
+
+ err = macsec_fs_rx_create(macsec_fs);
+ if (err)
+ return err;
+
+out:
+ rx_tables->refcnt++;
+ return err;
+}
+
+static void macsec_fs_rx_ft_put(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+
+ if (--rx_tables->refcnt)
+ return;
+
+ macsec_fs_rx_destroy(macsec_fs);
+}
+
+static void macsec_fs_rx_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ struct mlx5e_macsec_rx_rule *rx_rule)
+{
+ int i;
+
+ for (i = 0; i < RX_NUM_OF_RULES_PER_SA; ++i) {
+ if (rx_rule->rule[i]) {
+ mlx5_del_flow_rules(rx_rule->rule[i]);
+ rx_rule->rule[i] = NULL;
+ }
+ }
+
+ if (rx_rule->meta_modhdr) {
+ mlx5_modify_header_dealloc(macsec_fs->mdev, rx_rule->meta_modhdr);
+ rx_rule->meta_modhdr = NULL;
+ }
+
+ kfree(rx_rule);
+
+ macsec_fs_rx_ft_put(macsec_fs);
+}
+
+static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_macsec_rule_attrs *attrs,
+ bool sci_present)
+{
+ u8 tci_an = (sci_present << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) | attrs->assoc_num;
+ struct mlx5_flow_act_crypto_params *crypto_params = &flow_act->crypto;
+ __be32 *sci_p = (__be32 *)(&attrs->sci);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ /* MACsec ethertype */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_MACSEC);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5;
+
+ /* Sectag AN + TCI SC present bit*/
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0,
+ MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0,
+ tci_an << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+ if (sci_present) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters_5.macsec_tag_2);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_2,
+ be32_to_cpu(sci_p[0]));
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters_5.macsec_tag_3);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_3,
+ be32_to_cpu(sci_p[1]));
+ } else {
+ /* When SCI isn't present in the Sectag, need to match the source */
+ /* MAC address only if the SCI contains the default MACsec PORT */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16),
+ sci_p, ETH_ALEN);
+ }
+
+ crypto_params->type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC;
+ crypto_params->obj_id = attrs->macsec_obj_id;
+}
+
+static union mlx5e_macsec_rule *
+macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *macsec_ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 fs_id)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct net_device *netdev = macsec_fs->netdev;
+ union mlx5e_macsec_rule *macsec_rule = NULL;
+ struct mlx5_modify_hdr *modify_hdr = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5e_macsec_rx_rule *rx_rule;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5e_flow_table *ft_crypto;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+
+ err = macsec_fs_rx_ft_get(macsec_fs);
+ if (err)
+ goto out_spec;
+
+ macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL);
+ if (!macsec_rule) {
+ macsec_fs_rx_ft_put(macsec_fs);
+ goto out_spec;
+ }
+
+ rx_rule = &macsec_rule->rx_rule;
+ rx_tables = &rx_fs->tables;
+ ft_crypto = &rx_tables->ft_crypto;
+
+ /* Set bit[31 - 30] macsec marker - 0x01 */
+ /* Set bit[15-0] fs id */
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+ MLX5_SET(set_action_in, action, data, MLX5_MACSEC_RX_METADAT_HANDLE(fs_id) | BIT(30));
+ MLX5_SET(set_action_in, action, offset, 0);
+ MLX5_SET(set_action_in, action, length, 32);
+
+ modify_hdr = mlx5_modify_header_alloc(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC,
+ 1, action);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ netdev_err(netdev, "fail to alloc MACsec set modify_header_id err=%d\n", err);
+ modify_hdr = NULL;
+ goto err;
+ }
+ rx_rule->meta_modhdr = modify_hdr;
+
+ /* Rx crypto table with SCI rule */
+ macsec_fs_rx_setup_fte(spec, &flow_act, attrs, true);
+
+ flow_act.modify_hdr = modify_hdr;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = rx_tables->ft_check;
+ rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev,
+ "Failed to add SA with SCI rule to Rx crypto rule, err=%d\n",
+ err);
+ goto err;
+ }
+ rx_rule->rule[0] = rule;
+
+ /* Rx crypto table without SCI rule */
+ if ((cpu_to_be64((__force u64)attrs->sci) & 0xFFFF) == ntohs(MACSEC_PORT_ES)) {
+ memset(spec, 0, sizeof(struct mlx5_flow_spec));
+ memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+ memset(&flow_act, 0, sizeof(flow_act));
+
+ macsec_fs_rx_setup_fte(spec, &flow_act, attrs, false);
+
+ flow_act.modify_hdr = modify_hdr;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = rx_tables->ft_check;
+ rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(netdev,
+ "Failed to add SA without SCI rule to Rx crypto rule, err=%d\n",
+ err);
+ goto err;
+ }
+ rx_rule->rule[1] = rule;
+ }
+
+ kvfree(spec);
+ return macsec_rule;
+
+err:
+ macsec_fs_rx_del_rule(macsec_fs, rx_rule);
+ macsec_rule = NULL;
+out_spec:
+ kvfree(spec);
+ return macsec_rule;
+}
+
+static int macsec_fs_rx_init(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct net_device *netdev = macsec_fs->netdev;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *rx_tables;
+ struct mlx5e_macsec_rx *rx_fs;
+ struct mlx5_fc *flow_counter;
+ int err;
+
+ rx_fs = kzalloc(sizeof(*rx_fs), GFP_KERNEL);
+ if (!rx_fs)
+ return -ENOMEM;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Rx encrypt flow counter, err(%d)\n",
+ err);
+ goto err_encrypt_counter;
+ }
+
+ rx_tables = &rx_fs->tables;
+ rx_tables->check_rule_counter = flow_counter;
+
+ flow_counter = mlx5_fc_create(mdev, false);
+ if (IS_ERR(flow_counter)) {
+ err = PTR_ERR(flow_counter);
+ netdev_err(netdev,
+ "Failed to create MACsec Rx drop flow counter, err(%d)\n",
+ err);
+ goto err_drop_counter;
+ }
+ rx_tables->check_miss_rule_counter = flow_counter;
+
+ macsec_fs->rx_fs = rx_fs;
+
+ return 0;
+
+err_drop_counter:
+ mlx5_fc_destroy(mdev, rx_tables->check_rule_counter);
+ rx_tables->check_rule_counter = NULL;
+
+err_encrypt_counter:
+ kfree(rx_fs);
+ macsec_fs->rx_fs = NULL;
+
+ return err;
+}
+
+static void macsec_fs_rx_cleanup(struct mlx5e_macsec_fs *macsec_fs)
+{
+ struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+ struct mlx5e_macsec_tables *rx_tables;
+
+ if (!rx_fs)
+ return;
+
+ rx_tables = &rx_fs->tables;
+
+ if (rx_tables->refcnt) {
+ netdev_err(macsec_fs->netdev,
+ "Can't destroy MACsec offload rx_fs, refcnt(%u) isn't 0\n",
+ rx_tables->refcnt);
+ return;
+ }
+
+ if (rx_tables->check_miss_rule_counter) {
+ mlx5_fc_destroy(mdev, rx_tables->check_miss_rule_counter);
+ rx_tables->check_miss_rule_counter = NULL;
+ }
+
+ if (rx_tables->check_rule_counter) {
+ mlx5_fc_destroy(mdev, rx_tables->check_rule_counter);
+ rx_tables->check_rule_counter = NULL;
+ }
+
+ kfree(rx_fs);
+ macsec_fs->rx_fs = NULL;
+}
+
+void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats)
+{
+ struct mlx5e_macsec_stats *stats = (struct mlx5e_macsec_stats *)macsec_stats;
+ struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables;
+ struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+ struct mlx5_core_dev *mdev = macsec_fs->mdev;
+
+ if (tx_tables->check_rule_counter)
+ mlx5_fc_query(mdev, tx_tables->check_rule_counter,
+ &stats->macsec_tx_pkts, &stats->macsec_tx_bytes);
+
+ if (tx_tables->check_miss_rule_counter)
+ mlx5_fc_query(mdev, tx_tables->check_miss_rule_counter,
+ &stats->macsec_tx_pkts_drop, &stats->macsec_tx_bytes_drop);
+
+ if (rx_tables->check_rule_counter)
+ mlx5_fc_query(mdev, rx_tables->check_rule_counter,
+ &stats->macsec_rx_pkts, &stats->macsec_rx_bytes);
+
+ if (rx_tables->check_miss_rule_counter)
+ mlx5_fc_query(mdev, rx_tables->check_miss_rule_counter,
+ &stats->macsec_rx_pkts_drop, &stats->macsec_rx_bytes_drop);
+}
+
+union mlx5e_macsec_rule *
+mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *macsec_ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 *sa_fs_id)
+{
+ return (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
+ macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, sa_fs_id) :
+ macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id);
+}
+
+void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ union mlx5e_macsec_rule *macsec_rule,
+ int action)
+{
+ (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
+ macsec_fs_tx_del_rule(macsec_fs, &macsec_rule->tx_rule) :
+ macsec_fs_rx_del_rule(macsec_fs, &macsec_rule->rx_rule);
+}
+
+void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs)
+{
+ macsec_fs_rx_cleanup(macsec_fs);
+ macsec_fs_tx_cleanup(macsec_fs);
+ kfree(macsec_fs);
+}
+
+struct mlx5e_macsec_fs *
+mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev)
+{
+ struct mlx5e_macsec_fs *macsec_fs;
+ int err;
+
+ macsec_fs = kzalloc(sizeof(*macsec_fs), GFP_KERNEL);
+ if (!macsec_fs)
+ return NULL;
+
+ macsec_fs->mdev = mdev;
+ macsec_fs->netdev = netdev;
+
+ err = macsec_fs_tx_init(macsec_fs);
+ if (err) {
+ netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err);
+ goto err;
+ }
+
+ err = macsec_fs_rx_init(macsec_fs);
+ if (err) {
+ netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err);
+ goto tx_cleanup;
+ }
+
+ return macsec_fs;
+
+tx_cleanup:
+ macsec_fs_tx_cleanup(macsec_fs);
+err:
+ kfree(macsec_fs);
+ return NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h
new file mode 100644
index 000000000..b429648d4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_MACSEC_STEERING_H__
+#define __MLX5_MACSEC_STEERING_H__
+
+#ifdef CONFIG_MLX5_EN_MACSEC
+
+#include "en_accel/macsec.h"
+
+#define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16
+
+struct mlx5e_macsec_fs;
+union mlx5e_macsec_rule;
+
+struct mlx5_macsec_rule_attrs {
+ sci_t sci;
+ u32 macsec_obj_id;
+ u8 assoc_num;
+ int action;
+};
+
+enum mlx5_macsec_action {
+ MLX5_ACCEL_MACSEC_ACTION_ENCRYPT,
+ MLX5_ACCEL_MACSEC_ACTION_DECRYPT,
+};
+
+void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs);
+
+struct mlx5e_macsec_fs *
+mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev, struct net_device *netdev);
+
+union mlx5e_macsec_rule *
+mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs,
+ const struct macsec_context *ctx,
+ struct mlx5_macsec_rule_attrs *attrs,
+ u32 *sa_fs_id);
+
+void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs,
+ union mlx5e_macsec_rule *macsec_rule,
+ int action);
+
+void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats);
+
+#endif
+
+#endif /* __MLX5_MACSEC_STEERING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c
new file mode 100644
index 000000000..e50a2e3f3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "en_accel/macsec.h"
+
+static const struct counter_desc mlx5e_macsec_hw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes_drop) },
+};
+
+#define NUM_MACSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_macsec_hw_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(macsec_hw)
+{
+ if (!priv->macsec)
+ return 0;
+
+ if (mlx5e_is_macsec_device(priv->mdev))
+ return NUM_MACSEC_HW_COUNTERS;
+
+ return 0;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(macsec_hw) {}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(macsec_hw)
+{
+ unsigned int i;
+
+ if (!priv->macsec)
+ return idx;
+
+ if (!mlx5e_is_macsec_device(priv->mdev))
+ return idx;
+
+ for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_macsec_hw_stats_desc[i].format);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw)
+{
+ int i;
+
+ if (!priv->macsec)
+ return idx;
+
+ if (!mlx5e_is_macsec_device(priv->mdev))
+ return idx;
+
+ mlx5e_macsec_get_stats_fill(priv->macsec, mlx5e_macsec_get_stats(priv->macsec));
+ for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(mlx5e_macsec_get_stats(priv->macsec),
+ mlx5e_macsec_hw_stats_desc,
+ i);
+
+ return idx;
+}
+
+MLX5E_DEFINE_STATS_GRP(macsec_hw, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
new file mode 100644
index 000000000..58eacba6d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -0,0 +1,765 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include <linux/mlx5/fs.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "en.h"
+
+#define ARFS_HASH_SHIFT BITS_PER_BYTE
+#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
+
+struct arfs_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *default_rule;
+ struct hlist_head rules_hash[ARFS_HASH_SIZE];
+};
+
+enum arfs_type {
+ ARFS_IPV4_TCP,
+ ARFS_IPV6_TCP,
+ ARFS_IPV4_UDP,
+ ARFS_IPV6_UDP,
+ ARFS_NUM_TYPES,
+};
+
+struct mlx5e_arfs_tables {
+ struct arfs_table arfs_tables[ARFS_NUM_TYPES];
+ /* Protect aRFS rules list */
+ spinlock_t arfs_lock;
+ struct list_head rules;
+ int last_filter_id;
+ struct workqueue_struct *wq;
+};
+
+struct arfs_tuple {
+ __be16 etype;
+ u8 ip_proto;
+ union {
+ __be32 src_ipv4;
+ struct in6_addr src_ipv6;
+ };
+ union {
+ __be32 dst_ipv4;
+ struct in6_addr dst_ipv6;
+ };
+ __be16 src_port;
+ __be16 dst_port;
+};
+
+struct arfs_rule {
+ struct mlx5e_priv *priv;
+ struct work_struct arfs_work;
+ struct mlx5_flow_handle *rule;
+ struct hlist_node hlist;
+ int rxq;
+ /* Flow ID passed to ndo_rx_flow_steer */
+ int flow_id;
+ /* Filter ID returned by ndo_rx_flow_steer */
+ int filter_id;
+ struct arfs_tuple tuple;
+};
+
+#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \
+ for (i = 0; i < ARFS_NUM_TYPES; i++) \
+ mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j)
+
+#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \
+ for (j = 0; j < ARFS_HASH_SIZE; j++) \
+ hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
+
+static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type)
+{
+ switch (type) {
+ case ARFS_IPV4_TCP:
+ return MLX5_TT_IPV4_TCP;
+ case ARFS_IPV4_UDP:
+ return MLX5_TT_IPV4_UDP;
+ case ARFS_IPV6_TCP:
+ return MLX5_TT_IPV6_TCP;
+ case ARFS_IPV6_UDP:
+ return MLX5_TT_IPV6_UDP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int arfs_disable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err, i;
+
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ /* Modify ttc rules destination back to their default */
+ err = mlx5_ttc_fwd_default_dest(ttc, arfs_get_tt(i));
+ if (err) {
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, arfs_get_tt(i), err);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void arfs_del_rules(struct mlx5e_flow_steering *fs);
+
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
+{
+ /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile
+ * cleanup_rx callback and it is not recreated when
+ * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering()
+ * is not called by the uplink_rep profile init_rx callback. Thus, if
+ * ntuple is set, moving to switchdev flow will enter this function
+ * with fs->arfs nullified.
+ */
+ if (!mlx5e_fs_get_arfs(fs))
+ return 0;
+
+ arfs_del_rules(fs);
+
+ return arfs_disable(fs);
+}
+
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+ struct mlx5_flow_destination dest = {};
+ int err, i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ dest.ft = arfs->arfs_tables[i].ft.t;
+ /* Modify ttc rules destination to point on the aRFS FTs */
+ err = mlx5_ttc_fwd_dest(ttc, arfs_get_tt(i), &dest);
+ if (err) {
+ fs_err(fs, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
+ __func__, arfs_get_tt(i), err);
+ arfs_disable(fs);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void arfs_destroy_table(struct arfs_table *arfs_t)
+{
+ mlx5_del_flow_rules(arfs_t->default_rule);
+ mlx5e_destroy_flow_table(&arfs_t->ft);
+}
+
+static void _mlx5e_cleanup_tables(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+ int i;
+
+ arfs_del_rules(fs);
+ destroy_workqueue(arfs->wq);
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ if (!IS_ERR_OR_NULL(arfs->arfs_tables[i].ft.t))
+ arfs_destroy_table(&arfs->arfs_tables[i]);
+ }
+}
+
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple)
+{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+
+ if (!ntuple)
+ return;
+
+ _mlx5e_cleanup_tables(fs);
+ mlx5e_fs_set_arfs(fs, NULL);
+ kvfree(arfs);
+}
+
+static int arfs_add_default_rule(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ enum arfs_type type)
+{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+ struct arfs_table *arfs_t = &arfs->arfs_tables[type];
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ enum mlx5_traffic_types tt;
+ int err = 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ tt = arfs_get_tt(type);
+ if (tt == -EINVAL) {
+ fs_err(fs, "%s: bad arfs_type: %d\n", __func__, type);
+ return -EINVAL;
+ }
+
+ /* FIXME: Must use mlx5_ttc_get_default_dest(),
+ * but can't since TTC default is not setup yet !
+ */
+ dest.tir_num = mlx5e_rx_res_get_tirn_rss(rx_res, tt);
+ arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
+ &flow_act,
+ &dest, 1);
+ if (IS_ERR(arfs_t->default_rule)) {
+ err = PTR_ERR(arfs_t->default_rule);
+ arfs_t->default_rule = NULL;
+ fs_err(fs, "%s: add rule failed, arfs type=%d\n", __func__, type);
+ }
+
+ return err;
+}
+
+#define MLX5E_ARFS_NUM_GROUPS 2
+#define MLX5E_ARFS_GROUP1_SIZE (BIT(16) - 1)
+#define MLX5E_ARFS_GROUP2_SIZE BIT(0)
+#define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\
+ MLX5E_ARFS_GROUP2_SIZE)
+static int arfs_create_groups(struct mlx5e_flow_table *ft,
+ enum arfs_type type)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
+ sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free_g;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc,
+ outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
+ switch (type) {
+ case ARFS_IPV4_TCP:
+ case ARFS_IPV6_TCP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
+ break;
+ case ARFS_IPV4_UDP:
+ case ARFS_IPV6_UDP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport);
+ break;
+ default:
+ err = -EINVAL;
+ goto err_free_in;
+ }
+
+ switch (type) {
+ case ARFS_IPV4_TCP:
+ case ARFS_IPV4_UDP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ break;
+ case ARFS_IPV6_TCP:
+ case ARFS_IPV6_UDP:
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ break;
+ default:
+ err = -EINVAL;
+ goto err_free_in;
+ }
+
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ARFS_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_clean_group;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ARFS_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_clean_group;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err_clean_group:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+err_free_in:
+ kvfree(in);
+err_free_g:
+ kfree(ft->g);
+ ft->g = NULL;
+ return err;
+}
+
+static int arfs_create_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ enum arfs_type type)
+{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+ struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE;
+ ft_attr.level = MLX5E_ARFS_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = arfs_create_groups(ft, type);
+ if (err)
+ goto err;
+
+ err = arfs_add_default_rule(fs, rx_res, type);
+ if (err)
+ goto err;
+
+ return 0;
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
+{
+ struct mlx5e_arfs_tables *arfs;
+ int err = -ENOMEM;
+ int i;
+
+ if (!ntuple)
+ return 0;
+
+ arfs = kvzalloc(sizeof(*arfs), GFP_KERNEL);
+ if (!arfs)
+ return -ENOMEM;
+
+ spin_lock_init(&arfs->arfs_lock);
+ INIT_LIST_HEAD(&arfs->rules);
+ arfs->wq = create_singlethread_workqueue("mlx5e_arfs");
+ if (!arfs->wq)
+ goto err;
+
+ mlx5e_fs_set_arfs(fs, arfs);
+
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ err = arfs_create_table(fs, rx_res, i);
+ if (err)
+ goto err_des;
+ }
+ return 0;
+
+err_des:
+ _mlx5e_cleanup_tables(fs);
+err:
+ mlx5e_fs_set_arfs(fs, NULL);
+ kvfree(arfs);
+ return err;
+}
+
+#define MLX5E_ARFS_EXPIRY_QUOTA 60
+
+static void arfs_may_expire_flow(struct mlx5e_priv *priv)
+{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
+ struct arfs_rule *arfs_rule;
+ struct hlist_node *htmp;
+ HLIST_HEAD(del_list);
+ int quota = 0;
+ int i;
+ int j;
+
+ spin_lock_bh(&arfs->arfs_lock);
+ mlx5e_for_each_arfs_rule(arfs_rule, htmp, arfs->arfs_tables, i, j) {
+ if (!work_pending(&arfs_rule->arfs_work) &&
+ rps_may_expire_flow(priv->netdev,
+ arfs_rule->rxq, arfs_rule->flow_id,
+ arfs_rule->filter_id)) {
+ hlist_del_init(&arfs_rule->hlist);
+ hlist_add_head(&arfs_rule->hlist, &del_list);
+ if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
+ break;
+ }
+ }
+ spin_unlock_bh(&arfs->arfs_lock);
+ hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
+ if (arfs_rule->rule)
+ mlx5_del_flow_rules(arfs_rule->rule);
+ hlist_del(&arfs_rule->hlist);
+ kfree(arfs_rule);
+ }
+}
+
+static void arfs_del_rules(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+ struct hlist_node *htmp;
+ struct arfs_rule *rule;
+ HLIST_HEAD(del_list);
+ int i;
+ int j;
+
+ spin_lock_bh(&arfs->arfs_lock);
+ mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) {
+ hlist_del_init(&rule->hlist);
+ hlist_add_head(&rule->hlist, &del_list);
+ }
+ spin_unlock_bh(&arfs->arfs_lock);
+
+ hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
+ cancel_work_sync(&rule->arfs_work);
+ if (rule->rule)
+ mlx5_del_flow_rules(rule->rule);
+ hlist_del(&rule->hlist);
+ kfree(rule);
+ }
+}
+
+static struct hlist_head *
+arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port,
+ __be16 dst_port)
+{
+ unsigned long l;
+ int bucket_idx;
+
+ l = (__force unsigned long)src_port |
+ ((__force unsigned long)dst_port << 2);
+
+ bucket_idx = hash_long(l, ARFS_HASH_SHIFT);
+
+ return &arfs_t->rules_hash[bucket_idx];
+}
+
+static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
+ u8 ip_proto, __be16 etype)
+{
+ if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP)
+ return &arfs->arfs_tables[ARFS_IPV4_TCP];
+ if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP)
+ return &arfs->arfs_tables[ARFS_IPV4_UDP];
+ if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP)
+ return &arfs->arfs_tables[ARFS_IPV6_TCP];
+ if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP)
+ return &arfs->arfs_tables[ARFS_IPV6_UDP];
+
+ return NULL;
+}
+
+static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
+ struct arfs_rule *arfs_rule)
+{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
+ struct arfs_tuple *tuple = &arfs_rule->tuple;
+ struct mlx5_flow_handle *rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct arfs_table *arfs_table;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype,
+ ntohs(tuple->etype));
+ arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
+ if (!arfs_table) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ ft = arfs_table->ft.t;
+ if (tuple->ip_proto == IPPROTO_TCP) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_sport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
+ ntohs(tuple->dst_port));
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
+ ntohs(tuple->src_port));
+ } else {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.udp_dport);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.udp_sport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport,
+ ntohs(tuple->dst_port));
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport,
+ ntohs(tuple->src_port));
+ }
+ if (tuple->etype == htons(ETH_P_IP)) {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &tuple->src_ipv4,
+ 4);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &tuple->dst_ipv4,
+ 4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &tuple->src_ipv6,
+ 16);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &tuple->dst_ipv6,
+ 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff,
+ 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff,
+ 16);
+ }
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++;
+ mlx5e_dbg(HW, priv,
+ "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n",
+ __func__, arfs_rule->filter_id, arfs_rule->rxq,
+ tuple->ip_proto, err);
+ }
+
+out:
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule, u16 rxq)
+{
+ struct mlx5_flow_destination dst = {};
+ int err = 0;
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq);
+ err = mlx5_modify_rule_destination(rule, &dst, NULL);
+ if (err)
+ netdev_warn(priv->netdev,
+ "Failed to modify aRFS rule destination to rq=%d\n", rxq);
+}
+
+static void arfs_handle_work(struct work_struct *work)
+{
+ struct arfs_rule *arfs_rule = container_of(work,
+ struct arfs_rule,
+ arfs_work);
+ struct mlx5e_priv *priv = arfs_rule->priv;
+ struct mlx5e_arfs_tables *arfs;
+ struct mlx5_flow_handle *rule;
+
+ arfs = mlx5e_fs_get_arfs(priv->fs);
+ mutex_lock(&priv->state_lock);
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ spin_lock_bh(&arfs->arfs_lock);
+ hlist_del(&arfs_rule->hlist);
+ spin_unlock_bh(&arfs->arfs_lock);
+
+ mutex_unlock(&priv->state_lock);
+ kfree(arfs_rule);
+ goto out;
+ }
+ mutex_unlock(&priv->state_lock);
+
+ if (!arfs_rule->rule) {
+ rule = arfs_add_rule(priv, arfs_rule);
+ if (IS_ERR(rule))
+ goto out;
+ arfs_rule->rule = rule;
+ } else {
+ arfs_modify_rule_rq(priv, arfs_rule->rule,
+ arfs_rule->rxq);
+ }
+out:
+ arfs_may_expire_flow(priv);
+}
+
+static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
+ struct arfs_table *arfs_t,
+ const struct flow_keys *fk,
+ u16 rxq, u32 flow_id)
+{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
+ struct arfs_rule *rule;
+ struct arfs_tuple *tuple;
+
+ rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
+ if (!rule)
+ return NULL;
+
+ rule->priv = priv;
+ rule->rxq = rxq;
+ INIT_WORK(&rule->arfs_work, arfs_handle_work);
+
+ tuple = &rule->tuple;
+ tuple->etype = fk->basic.n_proto;
+ tuple->ip_proto = fk->basic.ip_proto;
+ if (tuple->etype == htons(ETH_P_IP)) {
+ tuple->src_ipv4 = fk->addrs.v4addrs.src;
+ tuple->dst_ipv4 = fk->addrs.v4addrs.dst;
+ } else {
+ memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
+ sizeof(struct in6_addr));
+ memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
+ sizeof(struct in6_addr));
+ }
+ tuple->src_port = fk->ports.src;
+ tuple->dst_port = fk->ports.dst;
+
+ rule->flow_id = flow_id;
+ rule->filter_id = arfs->last_filter_id++ % RPS_NO_FILTER;
+
+ hlist_add_head(&rule->hlist,
+ arfs_hash_bucket(arfs_t, tuple->src_port,
+ tuple->dst_port));
+ return rule;
+}
+
+static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk)
+{
+ if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst)
+ return false;
+ if (tuple->etype != fk->basic.n_proto)
+ return false;
+ if (tuple->etype == htons(ETH_P_IP))
+ return tuple->src_ipv4 == fk->addrs.v4addrs.src &&
+ tuple->dst_ipv4 == fk->addrs.v4addrs.dst;
+ if (tuple->etype == htons(ETH_P_IPV6))
+ return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
+ sizeof(struct in6_addr)) &&
+ !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
+ sizeof(struct in6_addr));
+ return false;
+}
+
+static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
+ const struct flow_keys *fk)
+{
+ struct arfs_rule *arfs_rule;
+ struct hlist_head *head;
+
+ head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst);
+ hlist_for_each_entry(arfs_rule, head, hlist) {
+ if (arfs_cmp(&arfs_rule->tuple, fk))
+ return arfs_rule;
+ }
+
+ return NULL;
+}
+
+int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_arfs_tables *arfs;
+ struct arfs_rule *arfs_rule;
+ struct arfs_table *arfs_t;
+ struct flow_keys fk;
+
+ arfs = mlx5e_fs_get_arfs(priv->fs);
+ if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+ return -EPROTONOSUPPORT;
+
+ if (fk.basic.n_proto != htons(ETH_P_IP) &&
+ fk.basic.n_proto != htons(ETH_P_IPV6))
+ return -EPROTONOSUPPORT;
+
+ if (skb->encapsulation)
+ return -EPROTONOSUPPORT;
+
+ arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto);
+ if (!arfs_t)
+ return -EPROTONOSUPPORT;
+
+ spin_lock_bh(&arfs->arfs_lock);
+ arfs_rule = arfs_find_rule(arfs_t, &fk);
+ if (arfs_rule) {
+ if (arfs_rule->rxq == rxq_index) {
+ spin_unlock_bh(&arfs->arfs_lock);
+ return arfs_rule->filter_id;
+ }
+ arfs_rule->rxq = rxq_index;
+ } else {
+ arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id);
+ if (!arfs_rule) {
+ spin_unlock_bh(&arfs->arfs_lock);
+ return -ENOMEM;
+ }
+ }
+ queue_work(arfs->wq, &arfs_rule->arfs_work);
+ spin_unlock_bh(&arfs->arfs_lock);
+ return arfs_rule->filter_id;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
new file mode 100644
index 000000000..03a99918a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+
+/* mlx5e global resources should be placed in this file.
+ * Global resources are common to all the netdevices created on the same nic.
+ */
+
+void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
+{
+ bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev);
+ bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+ bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read);
+
+ MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_pci_enable && ro_read);
+ MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write);
+}
+
+int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
+{
+ struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
+ int err;
+
+ err = mlx5_core_alloc_pd(mdev, &res->pdn);
+ if (err) {
+ mlx5_core_err(mdev, "alloc pd failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn);
+ if (err) {
+ mlx5_core_err(mdev, "alloc td failed, %d\n", err);
+ goto err_dealloc_pd;
+ }
+
+ err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey);
+ if (err) {
+ mlx5_core_err(mdev, "create mkey failed, %d\n", err);
+ goto err_dealloc_transport_domain;
+ }
+
+ err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false);
+ if (err) {
+ mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err);
+ goto err_destroy_mkey;
+ }
+
+ INIT_LIST_HEAD(&res->td.tirs_list);
+ mutex_init(&res->td.list_lock);
+
+ return 0;
+
+err_destroy_mkey:
+ mlx5_core_destroy_mkey(mdev, res->mkey);
+err_dealloc_transport_domain:
+ mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(mdev, res->pdn);
+ return err;
+}
+
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
+{
+ struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
+
+ mlx5_free_bfreg(mdev, &res->bfreg);
+ mlx5_core_destroy_mkey(mdev, res->mkey);
+ mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+ mlx5_core_dealloc_pd(mdev, res->pdn);
+ memset(res, 0, sizeof(*res));
+}
+
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
+ bool enable_mc_lb)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_tir *tir;
+ u8 lb_flags = 0;
+ int err = 0;
+ u32 tirn = 0;
+ int inlen;
+ void *in;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ if (enable_uc_lb)
+ lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+
+ if (enable_mc_lb)
+ lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+
+ if (lb_flags)
+ MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags);
+
+ MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+ mutex_lock(&mdev->mlx5e_res.hw_objs.td.list_lock);
+ list_for_each_entry(tir, &mdev->mlx5e_res.hw_objs.td.tirs_list, list) {
+ tirn = tir->tirn;
+ err = mlx5_core_modify_tir(mdev, tirn, in);
+ if (err)
+ break;
+ }
+ mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock);
+
+ kvfree(in);
+ if (err)
+ netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
new file mode 100644
index 000000000..89de92d06
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -0,0 +1,1256 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include "en.h"
+#include "en/port.h"
+#include "en/port_buffer.h"
+
+#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
+
+#define MLX5E_100MB (100000)
+#define MLX5E_1GB (1000000)
+
+#define MLX5E_CEE_STATE_UP 1
+#define MLX5E_CEE_STATE_DOWN 0
+
+/* Max supported cable length is 1000 meters */
+#define MLX5E_MAX_CABLE_LENGTH 1000
+
+enum {
+ MLX5E_VENDOR_TC_GROUP_NUM = 7,
+ MLX5E_LOWEST_PRIO_GROUP = 0,
+};
+
+enum {
+ MLX5_DCB_CHG_RESET,
+ MLX5_DCB_NO_CHG,
+ MLX5_DCB_CHG_NO_RESET,
+};
+
+#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \
+ MLX5_CAP_QCAM_REG(mdev, qpts) && \
+ MLX5_CAP_QCAM_REG(mdev, qpdpm))
+
+static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state);
+static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio);
+
+/* If dcbx mode is non-host set the dcbx mode to host.
+ */
+static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv,
+ enum mlx5_dcbx_oper_mode mode)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 param[MLX5_ST_SZ_DW(dcbx_param)];
+ int err;
+
+ err = mlx5_query_port_dcbx_param(mdev, param);
+ if (err)
+ return err;
+
+ MLX5_SET(dcbx_param, param, version_admin, mode);
+ if (mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ MLX5_SET(dcbx_param, param, willing_admin, 1);
+
+ return mlx5_set_port_dcbx_param(mdev, param);
+}
+
+static int mlx5e_dcbnl_switch_to_host_mode(struct mlx5e_priv *priv)
+{
+ struct mlx5e_dcbx *dcbx = &priv->dcbx;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, dcbx))
+ return 0;
+
+ if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ return 0;
+
+ err = mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST);
+ if (err)
+ return err;
+
+ dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
+ struct ieee_ets *ets)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 tc_group[IEEE_8021QAZ_MAX_TCS];
+ bool is_tc_group_6_exist = false;
+ bool is_zero_bw_ets_tc = false;
+ int err = 0;
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return -EOPNOTSUPP;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]);
+ if (err)
+ return err;
+ }
+
+ ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
+ for (i = 0; i < ets->ets_cap; i++) {
+ err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]);
+ if (err)
+ return err;
+
+ err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]);
+ if (err)
+ return err;
+
+ if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC &&
+ tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1))
+ is_zero_bw_ets_tc = true;
+
+ if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1))
+ is_tc_group_6_exist = true;
+ }
+
+ /* Report 0% ets tc if exits*/
+ if (is_zero_bw_ets_tc) {
+ for (i = 0; i < ets->ets_cap; i++)
+ if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP)
+ ets->tc_tx_bw[i] = 0;
+ }
+
+ /* Update tc_tsa based on fw setting*/
+ for (i = 0; i < ets->ets_cap; i++) {
+ if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC)
+ priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+ else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM &&
+ !is_tc_group_6_exist)
+ priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+ }
+ memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa));
+
+ return err;
+}
+
+static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc)
+{
+ bool any_tc_mapped_to_ets = false;
+ bool ets_zero_bw = false;
+ int strict_group;
+ int i;
+
+ for (i = 0; i <= max_tc; i++) {
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+ any_tc_mapped_to_ets = true;
+ if (!ets->tc_tx_bw[i])
+ ets_zero_bw = true;
+ }
+ }
+
+ /* strict group has higher priority than ets group */
+ strict_group = MLX5E_LOWEST_PRIO_GROUP;
+ if (any_tc_mapped_to_ets)
+ strict_group++;
+ if (ets_zero_bw)
+ strict_group++;
+
+ for (i = 0; i <= max_tc; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_VENDOR:
+ tc_group[i] = MLX5E_VENDOR_TC_GROUP_NUM;
+ break;
+ case IEEE_8021QAZ_TSA_STRICT:
+ tc_group[i] = strict_group++;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ tc_group[i] = MLX5E_LOWEST_PRIO_GROUP;
+ if (ets->tc_tx_bw[i] && ets_zero_bw)
+ tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1;
+ break;
+ }
+ }
+}
+
+static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
+ u8 *tc_group, int max_tc)
+{
+ int bw_for_ets_zero_bw_tc = 0;
+ int last_ets_zero_bw_tc = -1;
+ int num_ets_zero_bw = 0;
+ int i;
+
+ for (i = 0; i <= max_tc; i++) {
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS &&
+ !ets->tc_tx_bw[i]) {
+ num_ets_zero_bw++;
+ last_ets_zero_bw_tc = i;
+ }
+ }
+
+ if (num_ets_zero_bw)
+ bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw;
+
+ for (i = 0; i <= max_tc; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_VENDOR:
+ tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+ break;
+ case IEEE_8021QAZ_TSA_STRICT:
+ tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ tc_tx_bw[i] = ets->tc_tx_bw[i] ?
+ ets->tc_tx_bw[i] :
+ bw_for_ets_zero_bw_tc;
+ break;
+ }
+ }
+
+ /* Make sure the total bw for ets zero bw group is 100% */
+ if (last_ets_zero_bw_tc != -1)
+ tc_tx_bw[last_ets_zero_bw_tc] +=
+ MLX5E_MAX_BW_ALLOC % num_ets_zero_bw;
+}
+
+/* If there are ETS BW 0,
+ * Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%.
+ * Set group #0 to all the ETS BW 0 tcs and
+ * equally splits the 100% BW between them
+ * Report both group #0 and #1 as ETS type.
+ * All the tcs in group #0 will be reported with 0% BW.
+ */
+static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
+ u8 tc_group[IEEE_8021QAZ_MAX_TCS];
+ int max_tc = mlx5_max_tc(mdev);
+ int err, i;
+
+ mlx5e_build_tc_group(ets, tc_group, max_tc);
+ mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc);
+
+ err = mlx5_set_port_prio_tc(mdev, ets->prio_tc);
+ if (err)
+ return err;
+
+ err = mlx5_set_port_tc_group(mdev, tc_group);
+ if (err)
+ return err;
+
+ err = mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
+
+ if (err)
+ return err;
+
+ memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa));
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n",
+ __func__, i, ets->prio_tc[i]);
+ mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n",
+ __func__, i, tc_tx_bw[i], tc_group[i]);
+ }
+
+ return err;
+}
+
+static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
+ struct ieee_ets *ets,
+ bool zero_sum_allowed)
+{
+ bool have_ets_tc = false;
+ int bw_sum = 0;
+ int i;
+
+ /* Validate Priority */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) {
+ netdev_err(netdev,
+ "Failed to validate ETS: priority value greater than max(%d)\n",
+ MLX5E_MAX_PRIORITY);
+ return -EINVAL;
+ }
+ }
+
+ /* Validate Bandwidth Sum */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+ have_ets_tc = true;
+ bw_sum += ets->tc_tx_bw[i];
+ }
+ }
+
+ if (have_ets_tc && bw_sum != 100) {
+ if (bw_sum || (!bw_sum && !zero_sum_allowed))
+ netdev_err(netdev,
+ "Failed to validate ETS: BW sum is illegal\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
+ struct ieee_ets *ets)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return -EOPNOTSUPP;
+
+ err = mlx5e_dbcnl_validate_ets(netdev, ets, false);
+ if (err)
+ return err;
+
+ err = mlx5e_dcbnl_ieee_setets_core(priv, ets);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ int i;
+
+ pfc->pfc_cap = mlx5_max_tc(mdev) + 1;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ pfc->requests[i] = PPORT_PER_PRIO_GET(pstats, i, tx_pause);
+ pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause);
+ }
+
+ if (MLX5_BUFFER_SUPPORTED(mdev))
+ pfc->delay = priv->dcbx.cable_len;
+
+ return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL);
+}
+
+static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 old_cable_len = priv->dcbx.cable_len;
+ struct ieee_pfc pfc_new;
+ u32 changed = 0;
+ u8 curr_pfc_en;
+ int ret = 0;
+
+ /* pfc_en */
+ mlx5_query_port_pfc(mdev, &curr_pfc_en, NULL);
+ if (pfc->pfc_en != curr_pfc_en) {
+ ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
+ if (ret)
+ return ret;
+ mlx5_toggle_port_link(mdev);
+ changed |= MLX5E_PORT_BUFFER_PFC;
+ }
+
+ if (pfc->delay &&
+ pfc->delay < MLX5E_MAX_CABLE_LENGTH &&
+ pfc->delay != priv->dcbx.cable_len) {
+ priv->dcbx.cable_len = pfc->delay;
+ changed |= MLX5E_PORT_BUFFER_CABLE_LEN;
+ }
+
+ if (MLX5_BUFFER_SUPPORTED(mdev)) {
+ pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en;
+ if (priv->dcbx.manual_buffer)
+ ret = mlx5e_port_manual_buffer_config(priv, changed,
+ dev->mtu, &pfc_new,
+ NULL, NULL);
+
+ if (ret && (changed & MLX5E_PORT_BUFFER_CABLE_LEN))
+ priv->dcbx.cable_len = old_cable_len;
+ }
+
+ if (!ret) {
+ mlx5e_dbg(HW, priv,
+ "%s: PFC per priority bit mask: 0x%x\n",
+ __func__, pfc->pfc_en);
+ }
+ return ret;
+}
+
+static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return priv->dcbx.cap;
+}
+
+static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+ if (mode & DCB_CAP_DCBX_LLD_MANAGED)
+ return 1;
+
+ if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
+ if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
+ return 0;
+
+ /* set dcbx to fw controlled */
+ if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) {
+ dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+ dcbx->cap &= ~DCB_CAP_DCBX_HOST;
+ return 0;
+ }
+
+ return 1;
+ }
+
+ if (!(mode & DCB_CAP_DCBX_HOST))
+ return 1;
+
+ if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+ return 1;
+
+ dcbx->cap = mode;
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct dcb_app temp;
+ bool is_new;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) ||
+ !MLX5_DSCP_SUPPORTED(priv->mdev))
+ return -EOPNOTSUPP;
+
+ if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) ||
+ (app->protocol >= MLX5E_MAX_DSCP))
+ return -EINVAL;
+
+ /* Save the old entry info */
+ temp.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+ temp.protocol = app->protocol;
+ temp.priority = priv->dcbx_dp.dscp2prio[app->protocol];
+
+ /* Check if need to switch to dscp trust state */
+ if (!priv->dcbx.dscp_app_cnt) {
+ err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_DSCP);
+ if (err)
+ return err;
+ }
+
+ /* Skip the fw command if new and old mapping are the same */
+ if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) {
+ err = mlx5e_set_dscp2prio(priv, app->protocol, app->priority);
+ if (err)
+ goto fw_err;
+ }
+
+ /* Delete the old entry if exists */
+ is_new = false;
+ err = dcb_ieee_delapp(dev, &temp);
+ if (err)
+ is_new = true;
+
+ /* Add new entry and update counter */
+ err = dcb_ieee_setapp(dev, app);
+ if (err)
+ return err;
+
+ if (is_new)
+ priv->dcbx.dscp_app_cnt++;
+
+ return err;
+
+fw_err:
+ mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+ return err;
+}
+
+static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) ||
+ !MLX5_DSCP_SUPPORTED(priv->mdev))
+ return -EOPNOTSUPP;
+
+ if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) ||
+ (app->protocol >= MLX5E_MAX_DSCP))
+ return -EINVAL;
+
+ /* Skip if no dscp app entry */
+ if (!priv->dcbx.dscp_app_cnt)
+ return -ENOENT;
+
+ /* Check if the entry matches fw setting */
+ if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol])
+ return -ENOENT;
+
+ /* Delete the app entry */
+ err = dcb_ieee_delapp(dev, app);
+ if (err)
+ return err;
+
+ /* Reset the priority mapping back to zero */
+ err = mlx5e_set_dscp2prio(priv, app->protocol, 0);
+ if (err)
+ goto fw_err;
+
+ priv->dcbx.dscp_app_cnt--;
+
+ /* Check if need to switch to pcp trust state */
+ if (!priv->dcbx.dscp_app_cnt)
+ err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+
+ return err;
+
+fw_err:
+ mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+ return err;
+}
+
+static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+ u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+ int err;
+ int i;
+
+ err = mlx5_query_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
+ if (err)
+ return err;
+
+ memset(maxrate->tc_maxrate, 0, sizeof(maxrate->tc_maxrate));
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ switch (max_bw_unit[i]) {
+ case MLX5_100_MBPS_UNIT:
+ maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_100MB;
+ break;
+ case MLX5_GBPS_UNIT:
+ maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_1GB;
+ break;
+ case MLX5_BW_NO_LIMIT:
+ break;
+ default:
+ WARN(true, "non-supported BW unit");
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+ u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+ __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB);
+ int i;
+
+ memset(max_bw_value, 0, sizeof(max_bw_value));
+ memset(max_bw_unit, 0, sizeof(max_bw_unit));
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ if (!maxrate->tc_maxrate[i]) {
+ max_bw_unit[i] = MLX5_BW_NO_LIMIT;
+ continue;
+ }
+ if (maxrate->tc_maxrate[i] < upper_limit_mbps) {
+ max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
+ MLX5E_100MB);
+ max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1;
+ max_bw_unit[i] = MLX5_100_MBPS_UNIT;
+ } else {
+ max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
+ MLX5E_1GB);
+ max_bw_unit[i] = MLX5_GBPS_UNIT;
+ }
+ }
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n",
+ __func__, i, max_bw_value[i]);
+ }
+
+ return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
+}
+
+static u8 mlx5e_dcbnl_setall(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct ieee_ets ets;
+ struct ieee_pfc pfc;
+ int err = -EOPNOTSUPP;
+ int i;
+
+ if (!MLX5_CAP_GEN(mdev, ets))
+ goto out;
+
+ memset(&ets, 0, sizeof(ets));
+ memset(&pfc, 0, sizeof(pfc));
+
+ ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+ for (i = 0; i < CEE_DCBX_MAX_PGS; i++) {
+ ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i];
+ ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i];
+ ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+ ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i];
+ mlx5e_dbg(HW, priv,
+ "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n",
+ __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i],
+ ets.prio_tc[i]);
+ }
+
+ err = mlx5e_dbcnl_validate_ets(netdev, &ets, true);
+ if (err)
+ goto out;
+
+ err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+ if (err) {
+ netdev_err(netdev,
+ "%s, Failed to set ETS: %d\n", __func__, err);
+ goto out;
+ }
+
+ /* Set PFC */
+ pfc.pfc_cap = mlx5_max_tc(mdev) + 1;
+ if (!cee_cfg->pfc_enable)
+ pfc.pfc_en = 0;
+ else
+ for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+ pfc.pfc_en |= cee_cfg->pfc_setting[i] << i;
+
+ err = mlx5e_dcbnl_ieee_setpfc(netdev, &pfc);
+ if (err) {
+ netdev_err(netdev,
+ "%s, Failed to set PFC: %d\n", __func__, err);
+ goto out;
+ }
+out:
+ return err ? MLX5_DCB_NO_CHG : MLX5_DCB_CHG_RESET;
+}
+
+static u8 mlx5e_dcbnl_getstate(struct net_device *netdev)
+{
+ return MLX5E_CEE_STATE_UP;
+}
+
+static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
+ u8 *perm_addr)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (!perm_addr)
+ return;
+
+ memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+ mlx5_query_mac_address(priv->mdev, perm_addr);
+}
+
+static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
+ int priority, u8 prio_type,
+ u8 pgid, u8 bw_pct, u8 up_map)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ if (pgid >= CEE_DCBX_MAX_PGS) {
+ netdev_err(netdev,
+ "%s, priority group is out of range\n", __func__);
+ return;
+ }
+
+ cee_cfg->prio_to_pg_map[priority] = pgid;
+}
+
+static void mlx5e_dcbnl_setpgbwgcfgtx(struct net_device *netdev,
+ int pgid, u8 bw_pct)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if (pgid >= CEE_DCBX_MAX_PGS) {
+ netdev_err(netdev,
+ "%s, priority group is out of range\n", __func__);
+ return;
+ }
+
+ cee_cfg->pg_bw_pct[pgid] = bw_pct;
+}
+
+static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev,
+ int priority, u8 *prio_type,
+ u8 *pgid, u8 *bw_pct, u8 *up_map)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets)) {
+ netdev_err(netdev, "%s, ets is not supported\n", __func__);
+ return;
+ }
+
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ *prio_type = 0;
+ *bw_pct = 0;
+ *up_map = 0;
+
+ if (mlx5_query_port_prio_tc(mdev, priority, pgid))
+ *pgid = 0;
+}
+
+static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev,
+ int pgid, u8 *bw_pct)
+{
+ struct ieee_ets ets;
+
+ if (pgid >= CEE_DCBX_MAX_PGS) {
+ netdev_err(netdev,
+ "%s, priority group is out of range\n", __func__);
+ return;
+ }
+
+ mlx5e_dcbnl_ieee_getets(netdev, &ets);
+ *bw_pct = ets.tc_tx_bw[pgid];
+}
+
+static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev,
+ int priority, u8 setting)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ if (setting > 1)
+ return;
+
+ cee_cfg->pfc_setting[priority] = setting;
+}
+
+static int
+mlx5e_dcbnl_get_priority_pfc(struct net_device *netdev,
+ int priority, u8 *setting)
+{
+ struct ieee_pfc pfc;
+ int err;
+
+ err = mlx5e_dcbnl_ieee_getpfc(netdev, &pfc);
+
+ if (err)
+ *setting = 0;
+ else
+ *setting = (pfc.pfc_en >> priority) & 0x01;
+
+ return err;
+}
+
+static void mlx5e_dcbnl_getpfccfg(struct net_device *netdev,
+ int priority, u8 *setting)
+{
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ if (!setting)
+ return;
+
+ mlx5e_dcbnl_get_priority_pfc(netdev, priority, setting);
+}
+
+static u8 mlx5e_dcbnl_getcap(struct net_device *netdev,
+ int capid, u8 *cap)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 rval = 0;
+
+ switch (capid) {
+ case DCB_CAP_ATTR_PG:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_PFC:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_UP2TC:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_PG_TCS:
+ *cap = 1 << mlx5_max_tc(mdev);
+ break;
+ case DCB_CAP_ATTR_PFC_TCS:
+ *cap = 1 << mlx5_max_tc(mdev);
+ break;
+ case DCB_CAP_ATTR_GSP:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_BCN:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_DCBX:
+ *cap = priv->dcbx.cap |
+ DCB_CAP_DCBX_VER_CEE |
+ DCB_CAP_DCBX_VER_IEEE;
+ break;
+ default:
+ *cap = 0;
+ rval = 1;
+ break;
+ }
+
+ return rval;
+}
+
+static int mlx5e_dcbnl_getnumtcs(struct net_device *netdev,
+ int tcs_id, u8 *num)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ switch (tcs_id) {
+ case DCB_NUMTCS_ATTR_PG:
+ case DCB_NUMTCS_ATTR_PFC:
+ *num = mlx5_max_tc(mdev) + 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static u8 mlx5e_dcbnl_getpfcstate(struct net_device *netdev)
+{
+ struct ieee_pfc pfc;
+
+ if (mlx5e_dcbnl_ieee_getpfc(netdev, &pfc))
+ return MLX5E_CEE_STATE_DOWN;
+
+ return pfc.pfc_en ? MLX5E_CEE_STATE_UP : MLX5E_CEE_STATE_DOWN;
+}
+
+static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if ((state != MLX5E_CEE_STATE_UP) && (state != MLX5E_CEE_STATE_DOWN))
+ return;
+
+ cee_cfg->pfc_enable = state;
+}
+
+static int mlx5e_dcbnl_getbuffer(struct net_device *dev,
+ struct dcbnl_buffer *dcb_buffer)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_port_buffer port_buffer;
+ u8 buffer[MLX5E_MAX_PRIORITY];
+ int i, err;
+
+ if (!MLX5_BUFFER_SUPPORTED(mdev))
+ return -EOPNOTSUPP;
+
+ err = mlx5e_port_query_priority2buffer(mdev, buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+ dcb_buffer->prio2buffer[i] = buffer[i];
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++)
+ dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size;
+ dcb_buffer->total_size = port_buffer.port_buffer_size;
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_setbuffer(struct net_device *dev,
+ struct dcbnl_buffer *dcb_buffer)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_port_buffer port_buffer;
+ u8 old_prio2buffer[MLX5E_MAX_PRIORITY];
+ u32 *buffer_size = NULL;
+ u8 *prio2buffer = NULL;
+ u32 changed = 0;
+ int i, err;
+
+ if (!MLX5_BUFFER_SUPPORTED(mdev))
+ return -EOPNOTSUPP;
+
+ for (i = 0; i < DCBX_MAX_BUFFERS; i++)
+ mlx5_core_dbg(mdev, "buffer[%d]=%d\n", i, dcb_buffer->buffer_size[i]);
+
+ for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+ mlx5_core_dbg(mdev, "priority %d buffer%d\n", i, dcb_buffer->prio2buffer[i]);
+
+ err = mlx5e_port_query_priority2buffer(mdev, old_prio2buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_PRIORITY; i++) {
+ if (dcb_buffer->prio2buffer[i] != old_prio2buffer[i]) {
+ changed |= MLX5E_PORT_BUFFER_PRIO2BUFFER;
+ prio2buffer = dcb_buffer->prio2buffer;
+ break;
+ }
+ }
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) {
+ changed |= MLX5E_PORT_BUFFER_SIZE;
+ buffer_size = dcb_buffer->buffer_size;
+ break;
+ }
+ }
+
+ if (!changed)
+ return 0;
+
+ priv->dcbx.manual_buffer = true;
+ err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL,
+ buffer_size, prio2buffer);
+ return err;
+}
+
+static const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
+ .ieee_getets = mlx5e_dcbnl_ieee_getets,
+ .ieee_setets = mlx5e_dcbnl_ieee_setets,
+ .ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate,
+ .ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate,
+ .ieee_getpfc = mlx5e_dcbnl_ieee_getpfc,
+ .ieee_setpfc = mlx5e_dcbnl_ieee_setpfc,
+ .ieee_setapp = mlx5e_dcbnl_ieee_setapp,
+ .ieee_delapp = mlx5e_dcbnl_ieee_delapp,
+ .getdcbx = mlx5e_dcbnl_getdcbx,
+ .setdcbx = mlx5e_dcbnl_setdcbx,
+ .dcbnl_getbuffer = mlx5e_dcbnl_getbuffer,
+ .dcbnl_setbuffer = mlx5e_dcbnl_setbuffer,
+
+/* CEE interfaces */
+ .setall = mlx5e_dcbnl_setall,
+ .getstate = mlx5e_dcbnl_getstate,
+ .getpermhwaddr = mlx5e_dcbnl_getpermhwaddr,
+
+ .setpgtccfgtx = mlx5e_dcbnl_setpgtccfgtx,
+ .setpgbwgcfgtx = mlx5e_dcbnl_setpgbwgcfgtx,
+ .getpgtccfgtx = mlx5e_dcbnl_getpgtccfgtx,
+ .getpgbwgcfgtx = mlx5e_dcbnl_getpgbwgcfgtx,
+
+ .setpfccfg = mlx5e_dcbnl_setpfccfg,
+ .getpfccfg = mlx5e_dcbnl_getpfccfg,
+ .getcap = mlx5e_dcbnl_getcap,
+ .getnumtcs = mlx5e_dcbnl_getnumtcs,
+ .getpfcstate = mlx5e_dcbnl_getpfcstate,
+ .setpfcstate = mlx5e_dcbnl_setpfcstate,
+};
+
+void mlx5e_dcbnl_build_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
+ netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+}
+
+static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
+ enum mlx5_dcbx_oper_mode *mode)
+{
+ u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+ *mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+
+ if (!mlx5_query_port_dcbx_param(priv->mdev, out))
+ *mode = MLX5_GET(dcbx_param, out, version_oper);
+
+ /* From driver's point of view, we only care if the mode
+ * is host (HOST) or non-host (AUTO)
+ */
+ if (*mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ *mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+}
+
+static void mlx5e_ets_init(struct mlx5e_priv *priv)
+{
+ struct ieee_ets ets;
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return;
+
+ memset(&ets, 0, sizeof(ets));
+ ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
+ for (i = 0; i < ets.ets_cap; i++) {
+ ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+ ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+ ets.prio_tc[i] = i;
+ }
+
+ if (ets.ets_cap > 1) {
+ /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
+ ets.prio_tc[0] = 1;
+ ets.prio_tc[1] = 0;
+ }
+
+ err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+ if (err)
+ netdev_err(priv->netdev,
+ "%s, Failed to init ETS: %d\n", __func__, err);
+}
+
+enum {
+ INIT,
+ DELETE,
+};
+
+static void mlx5e_dcbnl_dscp_app(struct mlx5e_priv *priv, int action)
+{
+ struct dcb_app temp;
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+ return;
+
+ if (!MLX5_DSCP_SUPPORTED(priv->mdev))
+ return;
+
+ /* No SEL_DSCP entry in non DSCP state */
+ if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_DSCP)
+ return;
+
+ temp.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+ for (i = 0; i < MLX5E_MAX_DSCP; i++) {
+ temp.protocol = i;
+ temp.priority = priv->dcbx_dp.dscp2prio[i];
+ if (action == INIT)
+ dcb_ieee_setapp(priv->netdev, &temp);
+ else
+ dcb_ieee_delapp(priv->netdev, &temp);
+ }
+
+ priv->dcbx.dscp_app_cnt = (action == INIT) ? MLX5E_MAX_DSCP : 0;
+}
+
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv)
+{
+ mlx5e_dcbnl_dscp_app(priv, INIT);
+}
+
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv)
+{
+ mlx5e_dcbnl_dscp_app(priv, DELETE);
+}
+
+static void mlx5e_params_calc_trust_tx_min_inline_mode(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u8 trust_state)
+{
+ mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+ if (trust_state == MLX5_QPTS_TRUST_DSCP &&
+ params->tx_min_inline_mode == MLX5_INLINE_MODE_L2)
+ params->tx_min_inline_mode = MLX5_INLINE_MODE_IP;
+}
+
+static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context)
+{
+ u8 *trust_state = context;
+ int err;
+
+ err = mlx5_set_trust_state(priv->mdev, *trust_state);
+ if (err)
+ return err;
+ WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state);
+
+ return 0;
+}
+
+static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
+{
+ struct mlx5e_params new_params;
+ bool reset = true;
+ int err;
+
+ mutex_lock(&priv->state_lock);
+
+ new_params = priv->channels.params;
+ mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_params,
+ trust_state);
+
+ /* Skip if tx_min_inline is the same */
+ if (new_params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode)
+ reset = false;
+
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_update_trust_state_hw,
+ &trust_state, reset);
+
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio)
+{
+ int err;
+
+ err = mlx5_set_dscp2prio(priv->mdev, dscp, prio);
+ if (err)
+ return err;
+
+ priv->dcbx_dp.dscp2prio[dscp] = prio;
+ return err;
+}
+
+static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 trust_state;
+ int err;
+
+ if (!MLX5_DSCP_SUPPORTED(mdev)) {
+ WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP);
+ return 0;
+ }
+
+ err = mlx5_query_trust_state(priv->mdev, &trust_state);
+ if (err)
+ return err;
+ WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state);
+
+ if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) {
+ /*
+ * Align the driver state with the register state.
+ * Temporary state change is required to enable the app list reset.
+ */
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP;
+ mlx5e_dcbnl_delete_app(priv);
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
+ }
+
+ mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params,
+ priv->dcbx_dp.trust_state);
+
+ err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+#define MLX5E_BUFFER_CELL_SHIFT 7
+
+static u16 mlx5e_query_port_buffers_cell_size(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(sbcam_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(sbcam_reg)] = {};
+
+ if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+ return (1 << MLX5E_BUFFER_CELL_SHIFT);
+
+ if (mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_SBCAM, 0, 0))
+ return (1 << MLX5E_BUFFER_CELL_SHIFT);
+
+ return MLX5_GET(sbcam_reg, out, cap_cell_size);
+}
+
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
+{
+ struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+ mlx5e_trust_initialize(priv);
+
+ if (!MLX5_CAP_GEN(priv->mdev, qos))
+ return;
+
+ if (MLX5_CAP_GEN(priv->mdev, dcbx))
+ mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode);
+
+ priv->dcbx.cap = DCB_CAP_DCBX_VER_CEE |
+ DCB_CAP_DCBX_VER_IEEE;
+ if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ priv->dcbx.cap |= DCB_CAP_DCBX_HOST;
+
+ priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv);
+ priv->dcbx.manual_buffer = false;
+ priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN;
+
+ mlx5e_ets_init(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
new file mode 100644
index 000000000..ca9cfbf57
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/dim.h>
+#include "en.h"
+
+static void
+mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder,
+ struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
+{
+ mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
+ dim->state = DIM_START_MEASURE;
+}
+
+void mlx5e_rx_dim_work(struct work_struct *work)
+{
+ struct dim *dim = container_of(work, struct dim, work);
+ struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
+ struct dim_cq_moder cur_moder =
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+
+ mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
+}
+
+void mlx5e_tx_dim_work(struct work_struct *work)
+{
+ struct dim *dim = container_of(work, struct dim, work);
+ struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
+ struct dim_cq_moder cur_moder =
+ net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+
+ mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
new file mode 100644
index 000000000..ceeb23f47
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -0,0 +1,2453 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/ethtool_netlink.h>
+
+#include "en.h"
+#include "en/port.h"
+#include "en/params.h"
+#include "en/ptp.h"
+#include "lib/clock.h"
+#include "en/fs_ethtool.h"
+
+void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int count;
+
+ strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+ count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
+ fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
+ if (count >= sizeof(drvinfo->fw_version))
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d", fw_rev_maj(mdev),
+ fw_rev_min(mdev), fw_rev_sub(mdev));
+
+ strscpy(drvinfo->bus_info, dev_name(mdev->device),
+ sizeof(drvinfo->bus_info));
+}
+
+static void mlx5e_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_drvinfo(priv, drvinfo);
+}
+
+struct ptys2ethtool_config {
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
+};
+
+static
+struct ptys2ethtool_config ptys2legacy_ethtool_table[MLX5E_LINK_MODES_NUMBER];
+static
+struct ptys2ethtool_config ptys2ext_ethtool_table[MLX5E_EXT_LINK_MODES_NUMBER];
+
+#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, table, ...) \
+ ({ \
+ struct ptys2ethtool_config *cfg; \
+ const unsigned int modes[] = { __VA_ARGS__ }; \
+ unsigned int i, bit, idx; \
+ cfg = &ptys2##table##_ethtool_table[reg_]; \
+ bitmap_zero(cfg->supported, \
+ __ETHTOOL_LINK_MODE_MASK_NBITS); \
+ bitmap_zero(cfg->advertised, \
+ __ETHTOOL_LINK_MODE_MASK_NBITS); \
+ for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \
+ bit = modes[i] % 64; \
+ idx = modes[i] / 64; \
+ __set_bit(bit, &cfg->supported[idx]); \
+ __set_bit(bit, &cfg->advertised[idx]); \
+ } \
+ })
+
+void mlx5e_build_ptys2ethtool_map(void)
+{
+ memset(ptys2legacy_ethtool_table, 0, sizeof(ptys2legacy_ethtool_table));
+ memset(ptys2ext_ethtool_table, 0, sizeof(ptys2ext_ethtool_table));
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, legacy,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, legacy,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, legacy,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, legacy,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, legacy,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, legacy,
+ ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, legacy,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, legacy,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, legacy,
+ ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, legacy,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, legacy,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, legacy,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, legacy,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, legacy,
+ ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, legacy,
+ ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, legacy,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, legacy,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, legacy,
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy,
+ ETHTOOL_LINK_MODE_25000baseCR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, legacy,
+ ETHTOOL_LINK_MODE_25000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, legacy,
+ ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, legacy,
+ ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, legacy,
+ ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_SGMII_100M, ext,
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_X_SGMII, ext,
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+ ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_5GBASE_R, ext,
+ ETHTOOL_LINK_MODE_5000baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_XFI_XAUI_1, ext,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
+ ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseER_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_XLAUI_4_XLPPI_4, ext,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GAUI_1_25GBASE_CR_KR, ext,
+ ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+ ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2,
+ ext,
+ ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR, ext,
+ ETHTOOL_LINK_MODE_50000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseSR_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseCR_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseDR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_CAUI_4_100GBASE_CR4_KR4, ext,
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_2_100GBASE_CR2_KR2, ext,
+ ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_4_200GBASE_CR4_KR4, ext,
+ ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_1_100GBASE_CR_KR, ext,
+ ETHTOOL_LINK_MODE_100000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseSR_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseDR_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseCR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_2_200GBASE_CR2_KR2, ext,
+ ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_4_400GBASE_CR4_KR4, ext,
+ ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT);
+}
+
+static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev,
+ struct ptys2ethtool_config **arr,
+ u32 *size)
+{
+ bool ext = mlx5e_ptys_ext_supported(mdev);
+
+ *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
+ *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
+ ARRAY_SIZE(ptys2legacy_ethtool_table);
+}
+
+typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
+
+struct pflag_desc {
+ char name[ETH_GSTRING_LEN];
+ mlx5e_pflag_handler handler;
+};
+
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS];
+
+int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return mlx5e_stats_total_num(priv);
+ case ETH_SS_PRIV_FLAGS:
+ return MLX5E_NUM_PFLAGS;
+ case ETH_SS_TEST:
+ return mlx5e_self_test_num(priv);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_get_sset_count(struct net_device *dev, int sset)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_get_sset_count(priv, sset);
+}
+
+void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
+{
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_PRIV_FLAGS:
+ for (i = 0; i < MLX5E_NUM_PFLAGS; i++)
+ strcpy(data + i * ETH_GSTRING_LEN,
+ mlx5e_priv_flags[i].name);
+ break;
+
+ case ETH_SS_TEST:
+ mlx5e_self_test_fill_strings(priv, data);
+ break;
+
+ case ETH_SS_STATS:
+ mlx5e_stats_fill_strings(priv, data);
+ break;
+ }
+}
+
+static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_strings(priv, stringset, data);
+}
+
+void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
+ struct ethtool_stats *stats, u64 *data)
+{
+ int idx = 0;
+
+ mutex_lock(&priv->state_lock);
+ mlx5e_stats_update(priv);
+ mutex_unlock(&priv->state_lock);
+
+ mlx5e_stats_fill(priv, data, idx);
+}
+
+static void mlx5e_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
+}
+
+void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param)
+{
+ /* Limitation for regular RQ. XSK RQ may clamp the queue length in
+ * mlx5e_mpwqe_get_log_rq_size.
+ */
+ u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev,
+ PAGE_SHIFT,
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED);
+
+ param->rx_max_pending = 1 << min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
+ max_log_mpwrq_pkts);
+ param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
+ param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames;
+ param->tx_pending = 1 << priv->channels.params.log_sq_size;
+
+ kernel_param->tcp_data_split =
+ (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ?
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED :
+ ETHTOOL_TCP_DATA_SPLIT_DISABLED;
+}
+
+static void mlx5e_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
+}
+
+int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_params new_params;
+ u8 log_rq_size;
+ u8 log_sq_size;
+ int err = 0;
+
+ if (param->rx_jumbo_pending) {
+ netdev_info(priv->netdev, "%s: rx_jumbo_pending not supported\n",
+ __func__);
+ return -EINVAL;
+ }
+ if (param->rx_mini_pending) {
+ netdev_info(priv->netdev, "%s: rx_mini_pending not supported\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) {
+ netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n",
+ __func__, param->rx_pending,
+ 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
+ return -EINVAL;
+ }
+
+ if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
+ netdev_info(priv->netdev, "%s: tx_pending (%d) < min (%d)\n",
+ __func__, param->tx_pending,
+ 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
+ return -EINVAL;
+ }
+
+ log_rq_size = order_base_2(param->rx_pending);
+ log_sq_size = order_base_2(param->tx_pending);
+
+ if (log_rq_size == priv->channels.params.log_rq_mtu_frames &&
+ log_sq_size == priv->channels.params.log_sq_size)
+ return 0;
+
+ mutex_lock(&priv->state_lock);
+
+ new_params = priv->channels.params;
+ new_params.log_rq_mtu_frames = log_rq_size;
+ new_params.log_sq_size = log_sq_size;
+
+ err = mlx5e_validate_params(priv->mdev, &new_params);
+ if (err)
+ goto unlock;
+
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_ringparam(priv, param);
+}
+
+void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch)
+{
+ mutex_lock(&priv->state_lock);
+ ch->max_combined = priv->max_nch;
+ ch->combined_count = priv->channels.params.num_channels;
+ mutex_unlock(&priv->state_lock);
+}
+
+static void mlx5e_get_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_channels(priv, ch);
+}
+
+int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_params *cur_params = &priv->channels.params;
+ unsigned int count = ch->combined_count;
+ struct mlx5e_params new_params;
+ bool arfs_enabled;
+ int rss_cnt;
+ bool opened;
+ int err = 0;
+
+ if (!count) {
+ netdev_info(priv->netdev, "%s: combined_count=0 not supported\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (cur_params->num_channels == count)
+ return 0;
+
+ mutex_lock(&priv->state_lock);
+
+ /* Don't allow changing the number of channels if HTB offload is active,
+ * because the numeration of the QoS SQs will change, while per-queue
+ * qdiscs are attached.
+ */
+ if (mlx5e_selq_is_htb_enabled(&priv->selq)) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n",
+ __func__);
+ goto out;
+ }
+
+ /* Don't allow changing the number of channels if non-default RSS contexts exist,
+ * the kernel doesn't protect against set_channels operations that break them.
+ */
+ rss_cnt = mlx5e_rx_res_rss_cnt(priv->rx_res) - 1;
+ if (rss_cnt) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Non-default RSS contexts exist (%d), cannot change the number of channels\n",
+ __func__, rss_cnt);
+ goto out;
+ }
+
+ /* Don't allow changing the number of channels if MQPRIO mode channel offload is active,
+ * because it defines a partition over the channels queues.
+ */
+ if (cur_params->mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: MQPRIO mode channel offload is active, cannot change the number of channels\n",
+ __func__);
+ goto out;
+ }
+
+ new_params = *cur_params;
+ new_params.num_channels = count;
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE);
+ if (arfs_enabled)
+ mlx5e_arfs_disable(priv->fs);
+
+ /* Switch to new channels, set new parameters and close old ones */
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_num_channels_changed_ctx, NULL, true);
+
+ if (arfs_enabled) {
+ int err2 = mlx5e_arfs_enable(priv->fs);
+
+ if (err2)
+ netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n",
+ __func__, err2);
+ }
+
+out:
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_channels(priv, ch);
+}
+
+int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal)
+{
+ struct dim_cq_moder *rx_moder, *tx_moder;
+
+ if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
+ return -EOPNOTSUPP;
+
+ rx_moder = &priv->channels.params.rx_cq_moderation;
+ coal->rx_coalesce_usecs = rx_moder->usec;
+ coal->rx_max_coalesced_frames = rx_moder->pkts;
+ coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled;
+
+ tx_moder = &priv->channels.params.tx_cq_moderation;
+ coal->tx_coalesce_usecs = tx_moder->usec;
+ coal->tx_max_coalesced_frames = tx_moder->pkts;
+ coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled;
+
+ kernel_coal->use_cqe_mode_rx =
+ MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER);
+ kernel_coal->use_cqe_mode_tx =
+ MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_CQE_BASED_MODER);
+
+ return 0;
+}
+
+static int mlx5e_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
+}
+
+#define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD
+#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT
+
+static void
+mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int tc;
+ int i;
+
+ for (i = 0; i < priv->channels.num; ++i) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ mlx5_core_modify_cq_moderation(mdev,
+ &c->sq[tc].cq.mcq,
+ coal->tx_coalesce_usecs,
+ coal->tx_max_coalesced_frames);
+ }
+ }
+}
+
+static void
+mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int i;
+
+ for (i = 0; i < priv->channels.num; ++i) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
+ coal->rx_coalesce_usecs,
+ coal->rx_max_coalesced_frames);
+ }
+}
+
+/* convert a boolean value of cq_mode to mlx5 period mode
+ * true : MLX5_CQ_PERIOD_MODE_START_FROM_CQE
+ * false : MLX5_CQ_PERIOD_MODE_START_FROM_EQE
+ */
+static int cqe_mode_to_period_mode(bool val)
+{
+ return val ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct dim_cq_moder *rx_moder, *tx_moder;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params new_params;
+ bool reset_rx, reset_tx;
+ bool reset = true;
+ u8 cq_period_mode;
+ int err = 0;
+
+ if (!MLX5_CAP_GEN(mdev, cq_moderation))
+ return -EOPNOTSUPP;
+
+ if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME ||
+ coal->rx_coalesce_usecs > MLX5E_MAX_COAL_TIME) {
+ netdev_info(priv->netdev, "%s: maximum coalesce time supported is %lu usecs\n",
+ __func__, MLX5E_MAX_COAL_TIME);
+ return -ERANGE;
+ }
+
+ if (coal->tx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES ||
+ coal->rx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES) {
+ netdev_info(priv->netdev, "%s: maximum coalesced frames supported is %lu\n",
+ __func__, MLX5E_MAX_COAL_FRAMES);
+ return -ERANGE;
+ }
+
+ if ((kernel_coal->use_cqe_mode_rx || kernel_coal->use_cqe_mode_tx) &&
+ !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) {
+ NL_SET_ERR_MSG_MOD(extack, "cqe_mode_rx/tx is not supported on this device");
+ return -EOPNOTSUPP;
+ }
+
+ mutex_lock(&priv->state_lock);
+ new_params = priv->channels.params;
+
+ rx_moder = &new_params.rx_cq_moderation;
+ rx_moder->usec = coal->rx_coalesce_usecs;
+ rx_moder->pkts = coal->rx_max_coalesced_frames;
+ new_params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
+
+ tx_moder = &new_params.tx_cq_moderation;
+ tx_moder->usec = coal->tx_coalesce_usecs;
+ tx_moder->pkts = coal->tx_max_coalesced_frames;
+ new_params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
+
+ reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
+ reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled;
+
+ cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_rx);
+ if (cq_period_mode != rx_moder->cq_period_mode) {
+ mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode);
+ reset_rx = true;
+ }
+
+ cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_tx);
+ if (cq_period_mode != tx_moder->cq_period_mode) {
+ mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode);
+ reset_tx = true;
+ }
+
+ if (reset_rx) {
+ u8 mode = MLX5E_GET_PFLAG(&new_params,
+ MLX5E_PFLAG_RX_CQE_BASED_MODER);
+
+ mlx5e_reset_rx_moderation(&new_params, mode);
+ }
+ if (reset_tx) {
+ u8 mode = MLX5E_GET_PFLAG(&new_params,
+ MLX5E_PFLAG_TX_CQE_BASED_MODER);
+
+ mlx5e_reset_tx_moderation(&new_params, mode);
+ }
+
+ /* If DIM state hasn't changed, it's possible to modify interrupt
+ * moderation parameters on the fly, even if the channels are open.
+ */
+ if (!reset_rx && !reset_tx && test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ if (!coal->use_adaptive_rx_coalesce)
+ mlx5e_set_priv_channels_rx_coalesce(priv, coal);
+ if (!coal->use_adaptive_tx_coalesce)
+ mlx5e_set_priv_channels_tx_coalesce(priv, coal);
+ reset = false;
+ }
+
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
+
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack);
+}
+
+static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev,
+ unsigned long *supported_modes,
+ u32 eth_proto_cap)
+{
+ unsigned long proto_cap = eth_proto_cap;
+ struct ptys2ethtool_config *table;
+ u32 max_size;
+ int proto;
+
+ mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size);
+ for_each_set_bit(proto, &proto_cap, max_size)
+ bitmap_or(supported_modes, supported_modes,
+ table[proto].supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
+ u32 eth_proto_cap, bool ext)
+{
+ unsigned long proto_cap = eth_proto_cap;
+ struct ptys2ethtool_config *table;
+ u32 max_size;
+ int proto;
+
+ table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
+ max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
+ ARRAY_SIZE(ptys2legacy_ethtool_table);
+
+ for_each_set_bit(proto, &proto_cap, max_size)
+ bitmap_or(advertising_modes, advertising_modes,
+ table[proto].advertised,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static const u32 pplm_fec_2_ethtool[] = {
+ [MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF,
+ [MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER,
+ [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS,
+ [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS,
+ [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS,
+};
+
+static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)
+{
+ int mode = 0;
+
+ if (!fec_mode)
+ return ETHTOOL_FEC_AUTO;
+
+ mode = find_first_bit(&fec_mode, size);
+
+ if (mode < ARRAY_SIZE(pplm_fec_2_ethtool))
+ return pplm_fec_2_ethtool[mode];
+
+ return 0;
+}
+
+#define MLX5E_ADVERTISE_SUPPORTED_FEC(mlx5_fec, ethtool_fec) \
+ do { \
+ if (mlx5e_fec_in_caps(dev, 1 << (mlx5_fec))) \
+ __set_bit(ethtool_fec, \
+ link_ksettings->link_modes.supported);\
+ } while (0)
+
+static const u32 pplm_fec_2_ethtool_linkmodes[] = {
+ [MLX5E_FEC_NOFEC] = ETHTOOL_LINK_MODE_FEC_NONE_BIT,
+ [MLX5E_FEC_FIRECODE] = ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+ [MLX5E_FEC_RS_528_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT,
+ [MLX5E_FEC_RS_544_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT,
+ [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_LINK_MODE_FEC_LLRS_BIT,
+};
+
+static int get_fec_supported_advertised(struct mlx5_core_dev *dev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ unsigned long active_fec_long;
+ u32 active_fec;
+ u32 bitn;
+ int err;
+
+ err = mlx5e_get_fec_mode(dev, &active_fec, NULL);
+ if (err)
+ return (err == -EOPNOTSUPP) ? 0 : err;
+
+ MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_NOFEC,
+ ETHTOOL_LINK_MODE_FEC_NONE_BIT);
+ MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_FIRECODE,
+ ETHTOOL_LINK_MODE_FEC_BASER_BIT);
+ MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_RS_528_514,
+ ETHTOOL_LINK_MODE_FEC_RS_BIT);
+ MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_LLRS_272_257_1,
+ ETHTOOL_LINK_MODE_FEC_LLRS_BIT);
+
+ active_fec_long = active_fec;
+ /* active fec is a bit set, find out which bit is set and
+ * advertise the corresponding ethtool bit
+ */
+ bitn = find_first_bit(&active_fec_long, sizeof(active_fec_long) * BITS_PER_BYTE);
+ if (bitn < ARRAY_SIZE(pplm_fec_2_ethtool_linkmodes))
+ __set_bit(pplm_fec_2_ethtool_linkmodes[bitn],
+ link_ksettings->link_modes.advertising);
+
+ return 0;
+}
+
+static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev,
+ struct ethtool_link_ksettings *link_ksettings,
+ u32 eth_proto_cap, u8 connector_type)
+{
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) {
+ if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
+ | MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
+ | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4)
+ | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4)
+ | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4)
+ | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported,
+ FIBRE);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising,
+ FIBRE);
+ }
+
+ if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4)
+ | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4)
+ | MLX5E_PROT_MASK(MLX5E_10GBASE_KR)
+ | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4)
+ | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported,
+ Backplane);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising,
+ Backplane);
+ }
+ return;
+ }
+
+ switch (connector_type) {
+ case MLX5E_PORT_TP:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, TP);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, TP);
+ break;
+ case MLX5E_PORT_AUI:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, AUI);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, AUI);
+ break;
+ case MLX5E_PORT_BNC:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, BNC);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, BNC);
+ break;
+ case MLX5E_PORT_MII:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, MII);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, MII);
+ break;
+ case MLX5E_PORT_FIBRE:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, FIBRE);
+ break;
+ case MLX5E_PORT_DA:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, Backplane);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, Backplane);
+ break;
+ case MLX5E_PORT_NONE:
+ case MLX5E_PORT_OTHER:
+ default:
+ break;
+ }
+}
+
+static void get_speed_duplex(struct net_device *netdev,
+ u32 eth_proto_oper, bool force_legacy,
+ u16 data_rate_oper,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ u32 speed = SPEED_UNKNOWN;
+ u8 duplex = DUPLEX_UNKNOWN;
+
+ if (!netif_carrier_ok(netdev))
+ goto out;
+
+ speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy);
+ if (!speed) {
+ if (data_rate_oper)
+ speed = 100 * data_rate_oper;
+ else
+ speed = SPEED_UNKNOWN;
+ goto out;
+ }
+
+ duplex = DUPLEX_FULL;
+
+out:
+ link_ksettings->base.speed = speed;
+ link_ksettings->base.duplex = duplex;
+}
+
+static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ unsigned long *supported = link_ksettings->link_modes.supported;
+ ptys2ethtool_supported_link(mdev, supported, eth_proto_cap);
+
+ ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
+}
+
+static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause,
+ struct ethtool_link_ksettings *link_ksettings,
+ bool ext)
+{
+ unsigned long *advertising = link_ksettings->link_modes.advertising;
+ ptys2ethtool_adver_link(advertising, eth_proto_cap, ext);
+
+ if (rx_pause)
+ ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
+ if (tx_pause ^ rx_pause)
+ ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause);
+}
+
+static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = {
+ [MLX5E_PORT_UNKNOWN] = PORT_OTHER,
+ [MLX5E_PORT_NONE] = PORT_NONE,
+ [MLX5E_PORT_TP] = PORT_TP,
+ [MLX5E_PORT_AUI] = PORT_AUI,
+ [MLX5E_PORT_BNC] = PORT_BNC,
+ [MLX5E_PORT_MII] = PORT_MII,
+ [MLX5E_PORT_FIBRE] = PORT_FIBRE,
+ [MLX5E_PORT_DA] = PORT_DA,
+ [MLX5E_PORT_OTHER] = PORT_OTHER,
+ };
+
+static u8 get_connector_port(struct mlx5_core_dev *mdev, u32 eth_proto, u8 connector_type)
+{
+ if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type))
+ return ptys2connector_type[connector_type];
+
+ if (eth_proto &
+ (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) |
+ MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) |
+ MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) |
+ MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
+ return PORT_FIBRE;
+ }
+
+ if (eth_proto &
+ (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) |
+ MLX5E_PROT_MASK(MLX5E_10GBASE_CR) |
+ MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) {
+ return PORT_DA;
+ }
+
+ if (eth_proto &
+ (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) |
+ MLX5E_PROT_MASK(MLX5E_10GBASE_KR) |
+ MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) |
+ MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) {
+ return PORT_NONE;
+ }
+
+ return PORT_OTHER;
+}
+
+static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
+ bool ext = mlx5e_ptys_ext_supported(mdev);
+
+ ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext);
+}
+
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
+ u32 eth_proto_admin;
+ u8 an_disable_admin;
+ u16 data_rate_oper;
+ u32 eth_proto_oper;
+ u32 eth_proto_cap;
+ u8 connector_type;
+ u32 rx_pause = 0;
+ u32 tx_pause = 0;
+ u32 eth_proto_lp;
+ bool admin_ext;
+ u8 an_status;
+ bool ext;
+ int err;
+
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+ if (err) {
+ netdev_err(priv->netdev, "%s: query port ptys failed: %d\n",
+ __func__, err);
+ goto err_query_regs;
+ }
+ ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
+ eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_capability);
+ eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_admin);
+ /* Fields: eth_proto_admin and ext_eth_proto_admin are
+ * mutually exclusive. Hence try reading legacy advertising
+ * when extended advertising is zero.
+ * admin_ext indicates which proto_admin (ext vs. legacy)
+ * should be read and interpreted
+ */
+ admin_ext = ext;
+ if (ext && !eth_proto_admin) {
+ eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, false,
+ eth_proto_admin);
+ admin_ext = false;
+ }
+
+ eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, admin_ext,
+ eth_proto_oper);
+ eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
+ an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+ an_status = MLX5_GET(ptys_reg, out, an_status);
+ connector_type = MLX5_GET(ptys_reg, out, connector_type);
+ data_rate_oper = MLX5_GET(ptys_reg, out, data_rate_oper);
+
+ mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+
+ get_supported(mdev, eth_proto_cap, link_ksettings);
+ get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings,
+ admin_ext);
+ get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext,
+ data_rate_oper, link_ksettings);
+
+ eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
+ connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ?
+ connector_type : MLX5E_PORT_UNKNOWN;
+ link_ksettings->base.port = get_connector_port(mdev, eth_proto_oper, connector_type);
+ ptys2ethtool_supported_advertised_port(mdev, link_ksettings, eth_proto_admin,
+ connector_type);
+ get_lp_advertising(mdev, eth_proto_lp, link_ksettings);
+
+ if (an_status == MLX5_AN_COMPLETE)
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ lp_advertising, Autoneg);
+
+ link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE :
+ AUTONEG_ENABLE;
+ ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+ Autoneg);
+
+ err = get_fec_supported_advertised(mdev, link_ksettings);
+ if (err) {
+ netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n",
+ __func__, err);
+ err = 0; /* don't fail caps query because of FEC error */
+ }
+
+ if (!an_disable_admin)
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, Autoneg);
+
+err_query_regs:
+ return err;
+}
+
+static int mlx5e_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
+}
+
+static int mlx5e_speed_validate(struct net_device *netdev, bool ext,
+ const unsigned long link_modes, u8 autoneg)
+{
+ /* Extended link-mode has no speed limitations. */
+ if (ext)
+ return 0;
+
+ if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) &&
+ autoneg != AUTONEG_ENABLE) {
+ netdev_err(netdev, "%s: 56G link speed requires autoneg enabled\n",
+ __func__);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
+{
+ u32 i, ptys_modes = 0;
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+ if (*ptys2legacy_ethtool_table[i].advertised == 0)
+ continue;
+ if (bitmap_intersects(ptys2legacy_ethtool_table[i].advertised,
+ link_modes,
+ __ETHTOOL_LINK_MODE_MASK_NBITS))
+ ptys_modes |= MLX5E_PROT_MASK(i);
+ }
+
+ return ptys_modes;
+}
+
+static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes)
+{
+ u32 i, ptys_modes = 0;
+ unsigned long modes[2];
+
+ for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) {
+ if (ptys2ext_ethtool_table[i].advertised[0] == 0 &&
+ ptys2ext_ethtool_table[i].advertised[1] == 0)
+ continue;
+ memset(modes, 0, sizeof(modes));
+ bitmap_and(modes, ptys2ext_ethtool_table[i].advertised,
+ link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ if (modes[0] == ptys2ext_ethtool_table[i].advertised[0] &&
+ modes[1] == ptys2ext_ethtool_table[i].advertised[1])
+ ptys_modes |= MLX5E_PROT_MASK(i);
+ }
+ return ptys_modes;
+}
+
+static bool ext_link_mode_requested(const unsigned long *adver)
+{
+#define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT
+ int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,};
+
+ bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size);
+ return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static bool ext_requested(u8 autoneg, const unsigned long *adver, bool ext_supported)
+{
+ bool ext_link_mode = ext_link_mode_requested(adver);
+
+ return autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_supported;
+}
+
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_port_eth_proto eproto;
+ const unsigned long *adver;
+ bool an_changes = false;
+ u8 an_disable_admin;
+ bool ext_supported;
+ u8 an_disable_cap;
+ bool an_disable;
+ u32 link_modes;
+ u8 an_status;
+ u8 autoneg;
+ u32 speed;
+ bool ext;
+ int err;
+
+ u32 (*ethtool2ptys_adver_func)(const unsigned long *adver);
+
+ adver = link_ksettings->link_modes.advertising;
+ autoneg = link_ksettings->base.autoneg;
+ speed = link_ksettings->base.speed;
+
+ ext_supported = mlx5e_ptys_ext_supported(mdev);
+ ext = ext_requested(autoneg, adver, ext_supported);
+ if (!ext_supported && ext)
+ return -EOPNOTSUPP;
+
+ ethtool2ptys_adver_func = ext ? mlx5e_ethtool2ptys_ext_adver_link :
+ mlx5e_ethtool2ptys_adver_link;
+ err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
+ if (err) {
+ netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n",
+ __func__, err);
+ goto out;
+ }
+ link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
+ mlx5e_port_speed2linkmodes(mdev, speed, !ext);
+
+ err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg);
+ if (err)
+ goto out;
+
+ link_modes = link_modes & eproto.cap;
+ if (!link_modes) {
+ netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
+ __func__);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap,
+ &an_disable_admin);
+
+ an_disable = autoneg == AUTONEG_DISABLE;
+ an_changes = ((!an_disable && an_disable_admin) ||
+ (an_disable && !an_disable_admin));
+
+ if (!an_changes && link_modes == eproto.admin)
+ goto out;
+
+ mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext);
+ mlx5_toggle_port_link(mdev);
+
+out:
+ return err;
+}
+
+static int mlx5e_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
+}
+
+u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
+{
+ return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key);
+}
+
+static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_rxfh_key_size(priv);
+}
+
+u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv)
+{
+ return MLX5E_INDIR_RQT_SIZE;
+}
+
+static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_rxfh_indir_size(priv);
+}
+
+static int mlx5e_get_rxfh_context(struct net_device *dev, u32 *indir,
+ u8 *key, u8 *hfunc, u32 rss_context)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, indir, key, hfunc);
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_set_rxfh_context(struct net_device *dev, const u32 *indir,
+ const u8 *key, const u8 hfunc,
+ u32 *rss_context, bool delete)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ if (delete) {
+ err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
+ goto unlock;
+ }
+
+ if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+ unsigned int count = priv->channels.params.num_channels;
+
+ err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
+ if (err)
+ goto unlock;
+ }
+
+ err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, indir, key,
+ hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ return mlx5e_get_rxfh_context(netdev, indir, key, hfunc, 0);
+}
+
+int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, 0, indir, key,
+ hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100
+#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000
+#define MLX5E_PFC_PREVEN_MINOR_PRECENT 85
+#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC 80
+#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \
+ max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \
+ (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100)
+
+static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev,
+ u16 *pfc_prevention_tout)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+ !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+ return -EOPNOTSUPP;
+
+ return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL);
+}
+
+static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev,
+ u16 pfc_preven)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 critical_tout;
+ u16 minor;
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+ !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+ return -EOPNOTSUPP;
+
+ critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ?
+ MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC :
+ pfc_preven;
+
+ if (critical_tout != PFC_STORM_PREVENTION_DISABLE &&
+ (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC ||
+ critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) {
+ netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n",
+ __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC,
+ MLX5E_PFC_PREVEN_TOUT_MAX_MSEC);
+ return -EINVAL;
+ }
+
+ minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout);
+ return mlx5_set_port_stall_watermark(mdev, critical_tout,
+ minor);
+}
+
+static int mlx5e_get_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ void *data)
+{
+ int err;
+
+ switch (tuna->id) {
+ case ETHTOOL_PFC_PREVENTION_TOUT:
+ err = mlx5e_get_pfc_prevention_tout(dev, data);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int mlx5e_set_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ const void *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+
+ switch (tuna->id) {
+ case ETHTOOL_PFC_PREVENTION_TOUT:
+ err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static void mlx5e_get_pause_stats(struct net_device *netdev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_pause_get(priv, pause_stats);
+}
+
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause,
+ &pauseparam->tx_pause);
+ if (err) {
+ netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
+ __func__, err);
+ }
+}
+
+static void mlx5e_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_ethtool_get_pauseparam(priv, pauseparam);
+}
+
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+ return -EOPNOTSUPP;
+
+ if (pauseparam->autoneg)
+ return -EINVAL;
+
+ err = mlx5_set_port_pause(mdev,
+ pauseparam->rx_pause ? 1 : 0,
+ pauseparam->tx_pause ? 1 : 0);
+ if (err) {
+ netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
+ __func__, err);
+ }
+
+ return err;
+}
+
+static int mlx5e_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
+}
+
+int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
+ struct ethtool_ts_info *info)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ info->phc_index = mlx5_clock_get_ptp_index(mdev);
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
+ info->phc_index == -1)
+ return 0;
+
+ info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
+
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
+
+static int mlx5e_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_get_ts_info(priv, info);
+}
+
+static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev)
+{
+ __u32 ret = 0;
+
+ if (MLX5_CAP_GEN(mdev, wol_g))
+ ret |= WAKE_MAGIC;
+
+ if (MLX5_CAP_GEN(mdev, wol_s))
+ ret |= WAKE_MAGICSECURE;
+
+ if (MLX5_CAP_GEN(mdev, wol_a))
+ ret |= WAKE_ARP;
+
+ if (MLX5_CAP_GEN(mdev, wol_b))
+ ret |= WAKE_BCAST;
+
+ if (MLX5_CAP_GEN(mdev, wol_m))
+ ret |= WAKE_MCAST;
+
+ if (MLX5_CAP_GEN(mdev, wol_u))
+ ret |= WAKE_UCAST;
+
+ if (MLX5_CAP_GEN(mdev, wol_p))
+ ret |= WAKE_PHY;
+
+ return ret;
+}
+
+static __u32 mlx5e_reformat_wol_mode_mlx5_to_linux(u8 mode)
+{
+ __u32 ret = 0;
+
+ if (mode & MLX5_WOL_MAGIC)
+ ret |= WAKE_MAGIC;
+
+ if (mode & MLX5_WOL_SECURED_MAGIC)
+ ret |= WAKE_MAGICSECURE;
+
+ if (mode & MLX5_WOL_ARP)
+ ret |= WAKE_ARP;
+
+ if (mode & MLX5_WOL_BROADCAST)
+ ret |= WAKE_BCAST;
+
+ if (mode & MLX5_WOL_MULTICAST)
+ ret |= WAKE_MCAST;
+
+ if (mode & MLX5_WOL_UNICAST)
+ ret |= WAKE_UCAST;
+
+ if (mode & MLX5_WOL_PHY_ACTIVITY)
+ ret |= WAKE_PHY;
+
+ return ret;
+}
+
+static u8 mlx5e_reformat_wol_mode_linux_to_mlx5(__u32 mode)
+{
+ u8 ret = 0;
+
+ if (mode & WAKE_MAGIC)
+ ret |= MLX5_WOL_MAGIC;
+
+ if (mode & WAKE_MAGICSECURE)
+ ret |= MLX5_WOL_SECURED_MAGIC;
+
+ if (mode & WAKE_ARP)
+ ret |= MLX5_WOL_ARP;
+
+ if (mode & WAKE_BCAST)
+ ret |= MLX5_WOL_BROADCAST;
+
+ if (mode & WAKE_MCAST)
+ ret |= MLX5_WOL_MULTICAST;
+
+ if (mode & WAKE_UCAST)
+ ret |= MLX5_WOL_UNICAST;
+
+ if (mode & WAKE_PHY)
+ ret |= MLX5_WOL_PHY_ACTIVITY;
+
+ return ret;
+}
+
+static void mlx5e_get_wol(struct net_device *netdev,
+ struct ethtool_wolinfo *wol)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 mlx5_wol_mode;
+ int err;
+
+ memset(wol, 0, sizeof(*wol));
+
+ wol->supported = mlx5e_get_wol_supported(mdev);
+ if (!wol->supported)
+ return;
+
+ err = mlx5_query_port_wol(mdev, &mlx5_wol_mode);
+ if (err)
+ return;
+
+ wol->wolopts = mlx5e_reformat_wol_mode_mlx5_to_linux(mlx5_wol_mode);
+}
+
+static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ __u32 wol_supported = mlx5e_get_wol_supported(mdev);
+ u32 mlx5_wol_mode;
+
+ if (!wol_supported)
+ return -EOPNOTSUPP;
+
+ if (wol->wolopts & ~wol_supported)
+ return -EINVAL;
+
+ mlx5_wol_mode = mlx5e_reformat_wol_mode_linux_to_mlx5(wol->wolopts);
+
+ return mlx5_set_port_wol(mdev, mlx5_wol_mode);
+}
+
+static void mlx5e_get_fec_stats(struct net_device *netdev,
+ struct ethtool_fec_stats *fec_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_fec_get(priv, fec_stats);
+}
+
+static int mlx5e_get_fecparam(struct net_device *netdev,
+ struct ethtool_fecparam *fecparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 fec_configured;
+ u32 fec_active;
+ int err;
+
+ err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured);
+
+ if (err)
+ return err;
+
+ fecparam->active_fec = pplm2ethtool_fec((unsigned long)fec_active,
+ sizeof(unsigned long) * BITS_PER_BYTE);
+
+ if (!fecparam->active_fec)
+ return -EOPNOTSUPP;
+
+ fecparam->fec = pplm2ethtool_fec((unsigned long)fec_configured,
+ sizeof(unsigned long) * BITS_PER_BYTE);
+
+ return 0;
+}
+
+static int mlx5e_set_fecparam(struct net_device *netdev,
+ struct ethtool_fecparam *fecparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ unsigned long fec_bitmap;
+ u16 fec_policy = 0;
+ int mode;
+ int err;
+
+ bitmap_from_arr32(&fec_bitmap, &fecparam->fec, sizeof(fecparam->fec) * BITS_PER_BYTE);
+ if (bitmap_weight(&fec_bitmap, ETHTOOL_FEC_LLRS_BIT + 1) > 1)
+ return -EOPNOTSUPP;
+
+ for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) {
+ if (!(pplm_fec_2_ethtool[mode] & fecparam->fec))
+ continue;
+ fec_policy |= (1 << mode);
+ break;
+ }
+
+ err = mlx5e_set_fec_mode(mdev, fec_policy);
+
+ if (err)
+ return err;
+
+ mlx5_toggle_port_link(mdev);
+
+ return 0;
+}
+
+static u32 mlx5e_get_msglevel(struct net_device *dev)
+{
+ return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel;
+}
+
+static void mlx5e_set_msglevel(struct net_device *dev, u32 val)
+{
+ ((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val;
+}
+
+static int mlx5e_set_phys_id(struct net_device *dev,
+ enum ethtool_phys_id_state state)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 beacon_duration;
+
+ if (!MLX5_CAP_GEN(mdev, beacon_led))
+ return -EOPNOTSUPP;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ beacon_duration = MLX5_BEACON_DURATION_INF;
+ break;
+ case ETHTOOL_ID_INACTIVE:
+ beacon_duration = MLX5_BEACON_DURATION_OFF;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return mlx5_set_port_beacon(mdev, beacon_duration);
+}
+
+static int mlx5e_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *dev = priv->mdev;
+ int size_read = 0;
+ u8 data[4] = {0};
+
+ size_read = mlx5_query_module_eeprom(dev, 0, 2, data);
+ if (size_read < 2)
+ return -EIO;
+
+ /* data[0] = identifier byte */
+ switch (data[0]) {
+ case MLX5_MODULE_ID_QSFP:
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
+ break;
+ case MLX5_MODULE_ID_QSFP_PLUS:
+ case MLX5_MODULE_ID_QSFP28:
+ /* data[1] = revision id */
+ if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) {
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
+ }
+ break;
+ case MLX5_MODULE_ID_SFP:
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ break;
+ default:
+ netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
+ __func__, data[0]);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5e_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee,
+ u8 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int offset = ee->offset;
+ int size_read;
+ int i = 0;
+
+ if (!ee->len)
+ return -EINVAL;
+
+ memset(data, 0, ee->len);
+
+ while (i < ee->len) {
+ size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i,
+ data + i);
+
+ if (!size_read)
+ /* Done reading */
+ return 0;
+
+ if (size_read < 0) {
+ netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n",
+ __func__, size_read);
+ return size_read;
+ }
+
+ i += size_read;
+ offset += size_read;
+ }
+
+ return 0;
+}
+
+static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev,
+ const struct ethtool_module_eeprom *page_data,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_module_eeprom_query_params query;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 *data = page_data->data;
+ int size_read;
+ int i = 0;
+
+ if (!page_data->length)
+ return -EINVAL;
+
+ memset(data, 0, page_data->length);
+
+ query.offset = page_data->offset;
+ query.i2c_address = page_data->i2c_address;
+ query.bank = page_data->bank;
+ query.page = page_data->page;
+ while (i < page_data->length) {
+ query.size = page_data->length - i;
+ size_read = mlx5_query_module_eeprom_by_page(mdev, &query, data + i);
+
+ /* Done reading, return how many bytes was read */
+ if (!size_read)
+ return i;
+
+ if (size_read == -EINVAL)
+ return -EINVAL;
+ if (size_read < 0) {
+ netdev_err(priv->netdev, "%s: mlx5_query_module_eeprom_by_page failed:0x%x\n",
+ __func__, size_read);
+ return i;
+ }
+
+ i += size_read;
+ query.offset += size_read;
+ }
+
+ return i;
+}
+
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ struct ethtool_flash *flash)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *dev = priv->netdev;
+ const struct firmware *fw;
+ int err;
+
+ if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+ return -EOPNOTSUPP;
+
+ err = request_firmware_direct(&fw, flash->data, &dev->dev);
+ if (err)
+ return err;
+
+ dev_hold(dev);
+ rtnl_unlock();
+
+ err = mlx5_firmware_flash(mdev, fw, NULL);
+ release_firmware(fw);
+
+ rtnl_lock();
+ dev_put(dev);
+ return err;
+}
+
+static int mlx5e_flash_device(struct net_device *dev,
+ struct ethtool_flash *flash)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_flash_device(priv, flash);
+}
+
+static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
+ bool is_rx_cq)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ u8 cq_period_mode, current_cq_period_mode;
+ struct mlx5e_params new_params;
+
+ if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+ return -EOPNOTSUPP;
+
+ cq_period_mode = cqe_mode_to_period_mode(enable);
+
+ current_cq_period_mode = is_rx_cq ?
+ priv->channels.params.rx_cq_moderation.cq_period_mode :
+ priv->channels.params.tx_cq_moderation.cq_period_mode;
+
+ if (cq_period_mode == current_cq_period_mode)
+ return 0;
+
+ new_params = priv->channels.params;
+ if (is_rx_cq)
+ mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode);
+ else
+ mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode);
+
+ return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+}
+
+static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable)
+{
+ return set_pflag_cqe_based_moder(netdev, enable, false);
+}
+
+static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
+{
+ return set_pflag_cqe_based_moder(netdev, enable, true);
+}
+
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val, bool rx_filter)
+{
+ bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS);
+ struct mlx5e_params new_params;
+ int err = 0;
+
+ if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
+ return new_val ? -EOPNOTSUPP : 0;
+
+ if (curr_val == new_val)
+ return 0;
+
+ if (new_val && !mlx5e_profile_feature_cap(priv->profile, PTP_RX) && rx_filter) {
+ netdev_err(priv->netdev,
+ "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n");
+ return -EINVAL;
+ }
+
+ if (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ netdev_warn(priv->netdev, "Can't set CQE compression with HW-GRO, disable it first.\n");
+ return -EINVAL;
+ }
+
+ new_params = priv->channels.params;
+ MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
+ if (rx_filter)
+ new_params.ptp_rx = new_val;
+
+ if (new_params.ptp_rx == priv->channels.params.ptp_rx)
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+ else
+ err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
+ &new_params.ptp_rx, true);
+ if (err)
+ return err;
+
+ mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
+ MLX5E_GET_PFLAG(&priv->channels.params,
+ MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
+
+ return 0;
+}
+
+static int set_pflag_rx_cqe_compress(struct net_device *netdev,
+ bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ bool rx_filter;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, cqe_compression))
+ return -EOPNOTSUPP;
+
+ rx_filter = priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE;
+ err = mlx5e_modify_rx_cqe_compression_locked(priv, enable, rx_filter);
+ if (err)
+ return err;
+
+ priv->channels.params.rx_cqe_compress_def = enable;
+
+ return 0;
+}
+
+static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params new_params;
+
+ if (enable) {
+ /* Checking the regular RQ here; mlx5e_validate_xsk_param called
+ * from mlx5e_open_xsk will check for each XSK queue, and
+ * mlx5e_safe_switch_params will be reverted if any check fails.
+ */
+ int err = mlx5e_mpwrq_validate_regular(mdev, &priv->channels.params);
+
+ if (err)
+ return err;
+ } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+ netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n");
+ return -EINVAL;
+ }
+
+ new_params = priv->channels.params;
+
+ MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
+ mlx5e_set_rq_type(mdev, &new_params);
+
+ return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+}
+
+static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels *channels = &priv->channels;
+ struct mlx5e_channel *c;
+ int i;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) ||
+ priv->channels.params.xdp_prog)
+ return 0;
+
+ for (i = 0; i < channels->num; i++) {
+ c = channels->c[i];
+ if (enable)
+ __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ else
+ __clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ }
+
+ return 0;
+}
+
+static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params new_params;
+
+ if (enable && !mlx5e_tx_mpwqe_supported(mdev))
+ return -EOPNOTSUPP;
+
+ new_params = priv->channels.params;
+
+ MLX5E_SET_PFLAG(&new_params, flag, enable);
+
+ return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+}
+
+static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+}
+
+static int set_pflag_skb_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_SKB_TX_MPWQE, enable);
+}
+
+static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params new_params;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
+ return -EOPNOTSUPP;
+
+ /* Don't allow changing the PTP state if HTB offload is active, because
+ * the numeration of the QoS SQs will change, while per-queue qdiscs are
+ * attached.
+ */
+ if (mlx5e_selq_is_htb_enabled(&priv->selq)) {
+ netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ new_params = priv->channels.params;
+ /* Don't allow enabling TX-port-TS if MQPRIO mode channel offload is
+ * active, since it defines explicitly which TC accepts the packet.
+ * This conflicts with TX-port-TS hijacking the PTP traffic to a specific
+ * HW TX-queue.
+ */
+ if (enable && new_params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
+ netdev_err(priv->netdev,
+ "%s: MQPRIO mode channel offload is active, cannot set the TX-port-TS\n",
+ __func__);
+ return -EINVAL;
+ }
+ MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_PORT_TS, enable);
+ /* No need to verify SQ stop room as
+ * ptpsq.txqsq.stop_room <= generic_sq->stop_room, and both
+ * has the same log_sq_size.
+ */
+
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_num_channels_changed_ctx, NULL, true);
+ if (!err)
+ priv->tx_ptp_opened = true;
+
+ return err;
+}
+
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
+ { "rx_cqe_moder", set_pflag_rx_cqe_based_moder },
+ { "tx_cqe_moder", set_pflag_tx_cqe_based_moder },
+ { "rx_cqe_compress", set_pflag_rx_cqe_compress },
+ { "rx_striding_rq", set_pflag_rx_striding_rq },
+ { "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
+ { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe },
+ { "skb_tx_mpwqe", set_pflag_skb_tx_mpwqe },
+ { "tx_port_ts", set_pflag_tx_port_ts },
+};
+
+static int mlx5e_handle_pflag(struct net_device *netdev,
+ u32 wanted_flags,
+ enum mlx5e_priv_flag flag)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ bool enable = !!(wanted_flags & BIT(flag));
+ u32 changes = wanted_flags ^ priv->channels.params.pflags;
+ int err;
+
+ if (!(changes & BIT(flag)))
+ return 0;
+
+ err = mlx5e_priv_flags[flag].handler(netdev, enable);
+ if (err) {
+ netdev_err(netdev, "%s private flag '%s' failed err %d\n",
+ enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err);
+ return err;
+ }
+
+ MLX5E_SET_PFLAG(&priv->channels.params, flag, enable);
+ return 0;
+}
+
+static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ enum mlx5e_priv_flag pflag;
+ int err;
+
+ mutex_lock(&priv->state_lock);
+
+ for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) {
+ err = mlx5e_handle_pflag(netdev, pflags, pflag);
+ if (err)
+ break;
+ }
+
+ mutex_unlock(&priv->state_lock);
+
+ /* Need to fix some features.. */
+ netdev_update_features(netdev);
+
+ return err;
+}
+
+static u32 mlx5e_get_priv_flags(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return priv->channels.params.pflags;
+}
+
+int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
+ u32 *rule_locs)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
+ * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
+ * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
+ * is compiled out via CONFIG_MLX5_EN_RXNFC=n.
+ */
+ if (info->cmd == ETHTOOL_GRXRINGS) {
+ info->data = priv->channels.params.num_channels;
+ return 0;
+ }
+
+ return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs);
+}
+
+int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_rxnfc(priv, cmd);
+}
+
+static int query_port_status_opcode(struct mlx5_core_dev *mdev, u32 *status_opcode)
+{
+ struct mlx5_ifc_pddr_troubleshooting_page_bits *pddr_troubleshooting_page;
+ u32 in[MLX5_ST_SZ_DW(pddr_reg)] = {};
+ u32 out[MLX5_ST_SZ_DW(pddr_reg)];
+ int err;
+
+ MLX5_SET(pddr_reg, in, local_port, 1);
+ MLX5_SET(pddr_reg, in, page_select,
+ MLX5_PDDR_REG_PAGE_SELECT_TROUBLESHOOTING_INFO_PAGE);
+
+ pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, in, page_data);
+ MLX5_SET(pddr_troubleshooting_page, pddr_troubleshooting_page,
+ group_opcode, MLX5_PDDR_REG_TRBLSH_GROUP_OPCODE_MONITOR);
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PDDR, 0, 0);
+ if (err)
+ return err;
+
+ pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, out, page_data);
+ *status_opcode = MLX5_GET(pddr_troubleshooting_page, pddr_troubleshooting_page,
+ status_opcode);
+ return 0;
+}
+
+struct mlx5e_ethtool_link_ext_state_opcode_mapping {
+ u32 status_opcode;
+ enum ethtool_link_ext_state link_ext_state;
+ u8 link_ext_substate;
+};
+
+static const struct mlx5e_ethtool_link_ext_state_opcode_mapping
+mlx5e_link_ext_state_opcode_map[] = {
+ /* States relating to the autonegotiation or issues therein */
+ {2, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED},
+ {3, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED},
+ {4, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED},
+ {36, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE},
+ {38, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE},
+ {39, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD},
+
+ /* Failure during link training */
+ {5, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED},
+ {6, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT},
+ {7, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY},
+ {8, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, 0},
+ {14, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT},
+
+ /* Logical mismatch in physical coding sublayer or forward error correction sublayer */
+ {9, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK},
+ {10, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK},
+ {11, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS},
+ {12, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED},
+ {13, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED},
+
+ /* Signal integrity issues */
+ {15, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, 0},
+ {17, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS},
+ {42, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE},
+
+ /* No cable connected */
+ {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0},
+
+ /* Failure is related to cable, e.g., unsupported cable */
+ {16, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {20, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {29, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {1029, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+ {1031, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, 0},
+
+ /* Failure is related to EEPROM, e.g., failure during reading or parsing the data */
+ {1027, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0},
+
+ /* Failure during calibration algorithm */
+ {23, ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, 0},
+
+ /* The hardware is not able to provide the power required from cable or module */
+ {1032, ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, 0},
+
+ /* The module is overheated */
+ {1030, ETHTOOL_LINK_EXT_STATE_OVERHEAT, 0},
+};
+
+static void
+mlx5e_set_link_ext_state(struct mlx5e_ethtool_link_ext_state_opcode_mapping
+ link_ext_state_mapping,
+ struct ethtool_link_ext_state_info *link_ext_state_info)
+{
+ switch (link_ext_state_mapping.link_ext_state) {
+ case ETHTOOL_LINK_EXT_STATE_AUTONEG:
+ link_ext_state_info->autoneg =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ case ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE:
+ link_ext_state_info->link_training =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ case ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH:
+ link_ext_state_info->link_logical_mismatch =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ case ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY:
+ link_ext_state_info->bad_signal_integrity =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ case ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE:
+ link_ext_state_info->cable_issue =
+ link_ext_state_mapping.link_ext_substate;
+ break;
+ default:
+ break;
+ }
+
+ link_ext_state_info->link_ext_state = link_ext_state_mapping.link_ext_state;
+}
+
+static int
+mlx5e_get_link_ext_state(struct net_device *dev,
+ struct ethtool_link_ext_state_info *link_ext_state_info)
+{
+ struct mlx5e_ethtool_link_ext_state_opcode_mapping link_ext_state_mapping;
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ u32 status_opcode = 0;
+ int i;
+
+ /* Exit without data if the interface state is OK, since no extended data is
+ * available in such case
+ */
+ if (netif_carrier_ok(dev))
+ return -ENODATA;
+
+ if (query_port_status_opcode(priv->mdev, &status_opcode) ||
+ !status_opcode)
+ return -ENODATA;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5e_link_ext_state_opcode_map); i++) {
+ link_ext_state_mapping = mlx5e_link_ext_state_opcode_map[i];
+ if (link_ext_state_mapping.status_opcode == status_opcode) {
+ mlx5e_set_link_ext_state(link_ext_state_mapping,
+ link_ext_state_info);
+ return 0;
+ }
+ }
+
+ return -ENODATA;
+}
+
+static void mlx5e_get_eth_phy_stats(struct net_device *netdev,
+ struct ethtool_eth_phy_stats *phy_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_eth_phy_get(priv, phy_stats);
+}
+
+static void mlx5e_get_eth_mac_stats(struct net_device *netdev,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_eth_mac_get(priv, mac_stats);
+}
+
+static void mlx5e_get_eth_ctrl_stats(struct net_device *netdev,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_eth_ctrl_get(priv, ctrl_stats);
+}
+
+static void mlx5e_get_rmon_stats(struct net_device *netdev,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_rmon_get(priv, rmon_stats, ranges);
+}
+
+const struct ethtool_ops mlx5e_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE |
+ ETHTOOL_COALESCE_USE_CQE,
+ .get_drvinfo = mlx5e_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_link_ext_state = mlx5e_get_link_ext_state,
+ .get_strings = mlx5e_get_strings,
+ .get_sset_count = mlx5e_get_sset_count,
+ .get_ethtool_stats = mlx5e_get_ethtool_stats,
+ .get_ringparam = mlx5e_get_ringparam,
+ .set_ringparam = mlx5e_set_ringparam,
+ .get_channels = mlx5e_get_channels,
+ .set_channels = mlx5e_set_channels,
+ .get_coalesce = mlx5e_get_coalesce,
+ .set_coalesce = mlx5e_set_coalesce,
+ .get_link_ksettings = mlx5e_get_link_ksettings,
+ .set_link_ksettings = mlx5e_set_link_ksettings,
+ .get_rxfh_key_size = mlx5e_get_rxfh_key_size,
+ .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size,
+ .get_rxfh = mlx5e_get_rxfh,
+ .set_rxfh = mlx5e_set_rxfh,
+ .get_rxfh_context = mlx5e_get_rxfh_context,
+ .set_rxfh_context = mlx5e_set_rxfh_context,
+ .get_rxnfc = mlx5e_get_rxnfc,
+ .set_rxnfc = mlx5e_set_rxnfc,
+ .get_tunable = mlx5e_get_tunable,
+ .set_tunable = mlx5e_set_tunable,
+ .get_pause_stats = mlx5e_get_pause_stats,
+ .get_pauseparam = mlx5e_get_pauseparam,
+ .set_pauseparam = mlx5e_set_pauseparam,
+ .get_ts_info = mlx5e_get_ts_info,
+ .set_phys_id = mlx5e_set_phys_id,
+ .get_wol = mlx5e_get_wol,
+ .set_wol = mlx5e_set_wol,
+ .get_module_info = mlx5e_get_module_info,
+ .get_module_eeprom = mlx5e_get_module_eeprom,
+ .get_module_eeprom_by_page = mlx5e_get_module_eeprom_by_page,
+ .flash_device = mlx5e_flash_device,
+ .get_priv_flags = mlx5e_get_priv_flags,
+ .set_priv_flags = mlx5e_set_priv_flags,
+ .self_test = mlx5e_self_test,
+ .get_msglevel = mlx5e_get_msglevel,
+ .set_msglevel = mlx5e_set_msglevel,
+ .get_fec_stats = mlx5e_get_fec_stats,
+ .get_fecparam = mlx5e_get_fecparam,
+ .set_fecparam = mlx5e_set_fecparam,
+ .get_eth_phy_stats = mlx5e_get_eth_phy_stats,
+ .get_eth_mac_stats = mlx5e_get_eth_mac_stats,
+ .get_eth_ctrl_stats = mlx5e_get_eth_ctrl_stats,
+ .get_rmon_stats = mlx5e_get_rmon_stats,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
new file mode 100644
index 000000000..eba601487
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -0,0 +1,1582 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/list.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/mpfs.h>
+#include "en_tc.h"
+#include "lib/mpfs.h"
+#include "en/ptp.h"
+#include "en/fs_ethtool.h"
+
+struct mlx5e_flow_steering {
+ struct work_struct set_rx_mode_work;
+ bool state_destroy;
+ bool vlan_strip_disable;
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_namespace *egress_ns;
+#ifdef CONFIG_MLX5_EN_RXNFC
+ struct mlx5e_ethtool_steering *ethtool;
+#endif
+ struct mlx5e_tc_table *tc;
+ struct mlx5e_promisc_table promisc;
+ struct mlx5e_vlan_table *vlan;
+ struct mlx5e_l2_table l2;
+ struct mlx5_ttc_table *ttc;
+ struct mlx5_ttc_table *inner_ttc;
+#ifdef CONFIG_MLX5_EN_ARFS
+ struct mlx5e_arfs_tables *arfs;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_accel_fs_tcp *accel_tcp;
+#endif
+ struct mlx5e_fs_udp *udp;
+ struct mlx5e_fs_any *any;
+ struct mlx5e_ptp_fs *ptp_fs;
+};
+
+static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs,
+ struct mlx5e_l2_rule *ai, int type);
+static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs,
+ struct mlx5e_l2_rule *ai);
+
+enum {
+ MLX5E_FULLMATCH = 0,
+ MLX5E_ALLMULTI = 1,
+};
+
+enum {
+ MLX5E_UC = 0,
+ MLX5E_MC_IPV4 = 1,
+ MLX5E_MC_IPV6 = 2,
+ MLX5E_MC_OTHER = 3,
+};
+
+enum {
+ MLX5E_ACTION_NONE = 0,
+ MLX5E_ACTION_ADD = 1,
+ MLX5E_ACTION_DEL = 2,
+};
+
+struct mlx5e_l2_hash_node {
+ struct hlist_node hlist;
+ u8 action;
+ struct mlx5e_l2_rule ai;
+ bool mpfs;
+};
+
+static inline int mlx5e_hash_l2(const u8 *addr)
+{
+ return addr[5];
+}
+
+static void mlx5e_add_l2_to_hash(struct hlist_head *hash, const u8 *addr)
+{
+ struct mlx5e_l2_hash_node *hn;
+ int ix = mlx5e_hash_l2(addr);
+ int found = 0;
+
+ hlist_for_each_entry(hn, &hash[ix], hlist)
+ if (ether_addr_equal_64bits(hn->ai.addr, addr)) {
+ found = 1;
+ break;
+ }
+
+ if (found) {
+ hn->action = MLX5E_ACTION_NONE;
+ return;
+ }
+
+ hn = kzalloc(sizeof(*hn), GFP_ATOMIC);
+ if (!hn)
+ return;
+
+ ether_addr_copy(hn->ai.addr, addr);
+ hn->action = MLX5E_ACTION_ADD;
+
+ hlist_add_head(&hn->hlist, &hash[ix]);
+}
+
+static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn)
+{
+ hlist_del(&hn->hlist);
+ kfree(hn);
+}
+
+struct mlx5e_vlan_table {
+ struct mlx5e_flow_table ft;
+ DECLARE_BITMAP(active_cvlans, VLAN_N_VID);
+ DECLARE_BITMAP(active_svlans, VLAN_N_VID);
+ struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID];
+ struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID];
+ struct mlx5_flow_handle *untagged_rule;
+ struct mlx5_flow_handle *any_cvlan_rule;
+ struct mlx5_flow_handle *any_svlan_rule;
+ struct mlx5_flow_handle *trap_rule;
+ bool cvlan_filter_disabled;
+};
+
+unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan)
+{
+ return vlan->active_svlans;
+}
+
+struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan)
+{
+ return vlan->ft.t;
+}
+
+static int mlx5e_vport_context_update_vlans(struct mlx5e_flow_steering *fs)
+{
+ int max_list_size;
+ int list_size;
+ u16 *vlans;
+ int vlan;
+ int err;
+ int i;
+
+ list_size = 0;
+ for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID)
+ list_size++;
+
+ max_list_size = 1 << MLX5_CAP_GEN(fs->mdev, log_max_vlan_list);
+
+ if (list_size > max_list_size) {
+ fs_warn(fs, "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
+ list_size, max_list_size);
+ list_size = max_list_size;
+ }
+
+ vlans = kvcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
+ if (!vlans)
+ return -ENOMEM;
+
+ i = 0;
+ for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID) {
+ if (i >= list_size)
+ break;
+ vlans[i++] = vlan;
+ }
+
+ err = mlx5_modify_nic_vport_vlans(fs->mdev, vlans, list_size);
+ if (err)
+ fs_err(fs, "Failed to modify vport vlans list err(%d)\n",
+ err);
+
+ kvfree(vlans);
+ return err;
+}
+
+enum mlx5e_vlan_rule_type {
+ MLX5E_VLAN_RULE_TYPE_UNTAGGED,
+ MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID,
+ MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID,
+ MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID,
+ MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID,
+};
+
+static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5e_vlan_rule_type rule_type,
+ u16 vid, struct mlx5_flow_spec *spec)
+{
+ struct mlx5_flow_table *ft = fs->vlan->ft.t;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle **rule_p;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ int err = 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = fs->l2.ft.t;
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ /* cvlan_tag enabled in match criteria and
+ * disabled in match value means both S & C tags
+ * don't exist (untagged of both)
+ */
+ rule_p = &fs->vlan->untagged_rule;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
+ rule_p = &fs->vlan->any_cvlan_rule;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
+ rule_p = &fs->vlan->any_svlan_rule;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
+ rule_p = &fs->vlan->active_svlans_rule[vid];
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
+ vid);
+ break;
+ default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */
+ rule_p = &fs->vlan->active_cvlans_rule[vid];
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
+ vid);
+ break;
+ }
+
+ if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type))
+ return 0;
+
+ *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+
+ if (IS_ERR(*rule_p)) {
+ err = PTR_ERR(*rule_p);
+ *rule_p = NULL;
+ fs_err(fs, "%s: add rule failed\n", __func__);
+ }
+
+ return err;
+}
+
+static int mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID)
+ mlx5e_vport_context_update_vlans(fs);
+
+ err = __mlx5e_add_vlan_rule(fs, rule_type, vid, spec);
+
+ kvfree(spec);
+
+ return err;
+}
+
+static void mlx5e_fs_del_vlan_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ if (fs->vlan->untagged_rule) {
+ mlx5_del_flow_rules(fs->vlan->untagged_rule);
+ fs->vlan->untagged_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
+ if (fs->vlan->any_cvlan_rule) {
+ mlx5_del_flow_rules(fs->vlan->any_cvlan_rule);
+ fs->vlan->any_cvlan_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
+ if (fs->vlan->any_svlan_rule) {
+ mlx5_del_flow_rules(fs->vlan->any_svlan_rule);
+ fs->vlan->any_svlan_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
+ if (fs->vlan->active_svlans_rule[vid]) {
+ mlx5_del_flow_rules(fs->vlan->active_svlans_rule[vid]);
+ fs->vlan->active_svlans_rule[vid] = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID:
+ if (fs->vlan->active_cvlans_rule[vid]) {
+ mlx5_del_flow_rules(fs->vlan->active_cvlans_rule[vid]);
+ fs->vlan->active_cvlans_rule[vid] = NULL;
+ }
+ mlx5e_vport_context_update_vlans(fs);
+ break;
+ }
+}
+
+static void mlx5e_fs_del_any_vid_rules(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+}
+
+static int mlx5e_fs_add_any_vid_rules(struct mlx5e_flow_steering *fs)
+{
+ int err;
+
+ err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ if (err)
+ return err;
+
+ return mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num)
+{
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+ spec->flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
+ spec->flow_context.flow_tag = trap_id;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tir_num;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+ return rule;
+}
+
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num)
+{
+ struct mlx5_flow_table *ft = fs->vlan->ft.t;
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ rule = mlx5e_add_trap_rule(ft, trap_id, tir_num);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs->vlan->trap_rule = NULL;
+ fs_err(fs, "%s: add VLAN trap rule failed, err %d\n",
+ __func__, err);
+ return err;
+ }
+ fs->vlan->trap_rule = rule;
+ return 0;
+}
+
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs)
+{
+ if (fs->vlan->trap_rule) {
+ mlx5_del_flow_rules(fs->vlan->trap_rule);
+ fs->vlan->trap_rule = NULL;
+ }
+}
+
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num)
+{
+ struct mlx5_flow_table *ft = fs->l2.ft.t;
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ rule = mlx5e_add_trap_rule(ft, trap_id, tir_num);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs->l2.trap_rule = NULL;
+ fs_err(fs, "%s: add MAC trap rule failed, err %d\n",
+ __func__, err);
+ return err;
+ }
+ fs->l2.trap_rule = rule;
+ return 0;
+}
+
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs)
+{
+ if (fs->l2.trap_rule) {
+ mlx5_del_flow_rules(fs->l2.trap_rule);
+ fs->l2.trap_rule = NULL;
+ }
+}
+
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
+{
+ if (!fs->vlan->cvlan_filter_disabled)
+ return;
+
+ fs->vlan->cvlan_filter_disabled = false;
+ if (promisc)
+ return;
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+}
+
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
+{
+ if (!fs->vlan || fs->vlan->cvlan_filter_disabled)
+ return;
+
+ fs->vlan->cvlan_filter_disabled = true;
+ if (promisc)
+ return;
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+}
+
+static int mlx5e_vlan_rx_add_cvid(struct mlx5e_flow_steering *fs, u16 vid)
+{
+ int err;
+
+ set_bit(vid, fs->vlan->active_cvlans);
+
+ err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ if (err)
+ clear_bit(vid, fs->vlan->active_cvlans);
+
+ return err;
+}
+
+static int mlx5e_vlan_rx_add_svid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev, u16 vid)
+{
+ int err;
+
+ set_bit(vid, fs->vlan->active_svlans);
+
+ err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ if (err) {
+ clear_bit(vid, fs->vlan->active_svlans);
+ return err;
+ }
+
+ /* Need to fix some features.. */
+ netdev_update_features(netdev);
+ return err;
+}
+
+int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid)
+{
+
+ if (!fs->vlan) {
+ fs_err(fs, "Vlan doesn't exist\n");
+ return -EINVAL;
+ }
+
+ if (be16_to_cpu(proto) == ETH_P_8021Q)
+ return mlx5e_vlan_rx_add_cvid(fs, vid);
+ else if (be16_to_cpu(proto) == ETH_P_8021AD)
+ return mlx5e_vlan_rx_add_svid(fs, netdev, vid);
+
+ return -EOPNOTSUPP;
+}
+
+int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid)
+{
+ if (!fs->vlan) {
+ fs_err(fs, "Vlan doesn't exist\n");
+ return -EINVAL;
+ }
+
+ if (be16_to_cpu(proto) == ETH_P_8021Q) {
+ clear_bit(vid, fs->vlan->active_cvlans);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ } else if (be16_to_cpu(proto) == ETH_P_8021AD) {
+ clear_bit(vid, fs->vlan->active_svlans);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ netdev_update_features(netdev);
+ }
+
+ return 0;
+}
+
+static void mlx5e_fs_add_vlan_rules(struct mlx5e_flow_steering *fs)
+{
+ int i;
+
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+ for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) {
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ }
+
+ for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID)
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+
+ if (fs->vlan->cvlan_filter_disabled)
+ mlx5e_fs_add_any_vid_rules(fs);
+}
+
+static void mlx5e_del_vlan_rules(struct mlx5e_flow_steering *fs)
+{
+ int i;
+
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+ for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) {
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ }
+
+ for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID)
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+
+ WARN_ON_ONCE(fs->state_destroy);
+
+ mlx5e_remove_vlan_trap(fs);
+
+ /* must be called after DESTROY bit is set and
+ * set_rx_mode is called and flushed
+ */
+ if (fs->vlan->cvlan_filter_disabled)
+ mlx5e_fs_del_any_vid_rules(fs);
+}
+
+#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
+ for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
+ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
+
+static void mlx5e_execute_l2_action(struct mlx5e_flow_steering *fs,
+ struct mlx5e_l2_hash_node *hn)
+{
+ u8 action = hn->action;
+ u8 mac_addr[ETH_ALEN];
+ int l2_err = 0;
+
+ ether_addr_copy(mac_addr, hn->ai.addr);
+
+ switch (action) {
+ case MLX5E_ACTION_ADD:
+ mlx5e_add_l2_flow_rule(fs, &hn->ai, MLX5E_FULLMATCH);
+ if (!is_multicast_ether_addr(mac_addr)) {
+ l2_err = mlx5_mpfs_add_mac(fs->mdev, mac_addr);
+ hn->mpfs = !l2_err;
+ }
+ hn->action = MLX5E_ACTION_NONE;
+ break;
+
+ case MLX5E_ACTION_DEL:
+ if (!is_multicast_ether_addr(mac_addr) && hn->mpfs)
+ l2_err = mlx5_mpfs_del_mac(fs->mdev, mac_addr);
+ mlx5e_del_l2_flow_rule(fs, &hn->ai);
+ mlx5e_del_l2_from_hash(hn);
+ break;
+ }
+
+ if (l2_err)
+ fs_warn(fs, "MPFS, failed to %s mac %pM, err(%d)\n",
+ action == MLX5E_ACTION_ADD ? "add" : "del",
+ mac_addr, l2_err);
+}
+
+static void mlx5e_sync_netdev_addr(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
+{
+ struct netdev_hw_addr *ha;
+
+ netif_addr_lock_bh(netdev);
+
+ mlx5e_add_l2_to_hash(fs->l2.netdev_uc, netdev->dev_addr);
+ netdev_for_each_uc_addr(ha, netdev)
+ mlx5e_add_l2_to_hash(fs->l2.netdev_uc, ha->addr);
+
+ netdev_for_each_mc_addr(ha, netdev)
+ mlx5e_add_l2_to_hash(fs->l2.netdev_mc, ha->addr);
+
+ netif_addr_unlock_bh(netdev);
+}
+
+static void mlx5e_fill_addr_array(struct mlx5e_flow_steering *fs, int list_type,
+ struct net_device *ndev,
+ u8 addr_array[][ETH_ALEN], int size)
+{
+ bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+ struct mlx5e_l2_hash_node *hn;
+ struct hlist_head *addr_list;
+ struct hlist_node *tmp;
+ int i = 0;
+ int hi;
+
+ addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc;
+
+ if (is_uc) /* Make sure our own address is pushed first */
+ ether_addr_copy(addr_array[i++], ndev->dev_addr);
+ else if (fs->l2.broadcast_enabled)
+ ether_addr_copy(addr_array[i++], ndev->broadcast);
+
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) {
+ if (ether_addr_equal(ndev->dev_addr, hn->ai.addr))
+ continue;
+ if (i >= size)
+ break;
+ ether_addr_copy(addr_array[i++], hn->ai.addr);
+ }
+}
+
+static void mlx5e_vport_context_update_addr_list(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ int list_type)
+{
+ bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+ struct mlx5e_l2_hash_node *hn;
+ u8 (*addr_array)[ETH_ALEN] = NULL;
+ struct hlist_head *addr_list;
+ struct hlist_node *tmp;
+ int max_size;
+ int size;
+ int err;
+ int hi;
+
+ size = is_uc ? 0 : (fs->l2.broadcast_enabled ? 1 : 0);
+ max_size = is_uc ?
+ 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_mc_list);
+
+ addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc;
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi)
+ size++;
+
+ if (size > max_size) {
+ fs_warn(fs, "mdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
+ is_uc ? "UC" : "MC", size, max_size);
+ size = max_size;
+ }
+
+ if (size) {
+ addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+ if (!addr_array) {
+ err = -ENOMEM;
+ goto out;
+ }
+ mlx5e_fill_addr_array(fs, list_type, netdev, addr_array, size);
+ }
+
+ err = mlx5_modify_nic_vport_mac_list(fs->mdev, list_type, addr_array, size);
+out:
+ if (err)
+ fs_err(fs, "Failed to modify vport %s list err(%d)\n",
+ is_uc ? "UC" : "MC", err);
+ kfree(addr_array);
+}
+
+static void mlx5e_vport_context_update(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
+{
+ struct mlx5e_l2_table *ea = &fs->l2;
+
+ mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_UC);
+ mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_MC);
+ mlx5_modify_nic_vport_promisc(fs->mdev, 0,
+ ea->allmulti_enabled,
+ ea->promisc_enabled);
+}
+
+static void mlx5e_apply_netdev_addr(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_l2_hash_node *hn;
+ struct hlist_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i)
+ mlx5e_execute_l2_action(fs, hn);
+
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i)
+ mlx5e_execute_l2_action(fs, hn);
+}
+
+static void mlx5e_handle_netdev_addr(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
+{
+ struct mlx5e_l2_hash_node *hn;
+ struct hlist_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i)
+ hn->action = MLX5E_ACTION_DEL;
+ mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i)
+ hn->action = MLX5E_ACTION_DEL;
+
+ if (fs->state_destroy)
+ mlx5e_sync_netdev_addr(fs, netdev);
+
+ mlx5e_apply_netdev_addr(fs);
+}
+
+#define MLX5E_PROMISC_GROUP0_SIZE BIT(0)
+#define MLX5E_PROMISC_TABLE_SIZE MLX5E_PROMISC_GROUP0_SIZE
+
+static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_flow_table *ft = fs->promisc.ft.t;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle **rule_p;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = mlx5_get_ttc_flow_table(fs->ttc);
+
+ rule_p = &fs->promisc.rule;
+ *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(*rule_p)) {
+ err = PTR_ERR(*rule_p);
+ *rule_p = NULL;
+ fs_err(fs, "%s: add promiscuous rule failed\n", __func__);
+ }
+ kvfree(spec);
+ return err;
+}
+
+static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_flow_table *ft = &fs->promisc.ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.level = MLX5E_PROMISC_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ fs_err(fs, "fail to create promisc table err=%d\n", err);
+ return err;
+ }
+
+ err = mlx5e_add_promisc_rule(fs);
+ if (err)
+ goto err_destroy_promisc_table;
+
+ return 0;
+
+err_destroy_promisc_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return err;
+}
+
+static void mlx5e_del_promisc_rule(struct mlx5e_flow_steering *fs)
+{
+ if (WARN(!fs->promisc.rule, "Trying to remove non-existing promiscuous rule"))
+ return;
+ mlx5_del_flow_rules(fs->promisc.rule);
+ fs->promisc.rule = NULL;
+}
+
+static void mlx5e_destroy_promisc_table(struct mlx5e_flow_steering *fs)
+{
+ if (!fs->promisc.ft.t)
+ return;
+ mlx5e_del_promisc_rule(fs);
+ mlx5_destroy_flow_table(fs->promisc.ft.t);
+ fs->promisc.ft.t = NULL;
+}
+
+void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev)
+{
+ struct mlx5e_l2_table *ea = &fs->l2;
+
+ bool rx_mode_enable = fs->state_destroy;
+ bool promisc_enabled = rx_mode_enable && (netdev->flags & IFF_PROMISC);
+ bool allmulti_enabled = rx_mode_enable && (netdev->flags & IFF_ALLMULTI);
+ bool broadcast_enabled = rx_mode_enable;
+
+ bool enable_promisc = !ea->promisc_enabled && promisc_enabled;
+ bool disable_promisc = ea->promisc_enabled && !promisc_enabled;
+ bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled;
+ bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled;
+ bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled;
+ bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled;
+ int err;
+
+ if (enable_promisc) {
+ err = mlx5e_create_promisc_table(fs);
+ if (err)
+ enable_promisc = false;
+ if (!fs->vlan_strip_disable && !err)
+ fs_warn_once(fs,
+ "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
+ }
+ if (enable_allmulti)
+ mlx5e_add_l2_flow_rule(fs, &ea->allmulti, MLX5E_ALLMULTI);
+ if (enable_broadcast)
+ mlx5e_add_l2_flow_rule(fs, &ea->broadcast, MLX5E_FULLMATCH);
+
+ mlx5e_handle_netdev_addr(fs, netdev);
+
+ if (disable_broadcast)
+ mlx5e_del_l2_flow_rule(fs, &ea->broadcast);
+ if (disable_allmulti)
+ mlx5e_del_l2_flow_rule(fs, &ea->allmulti);
+ if (disable_promisc)
+ mlx5e_destroy_promisc_table(fs);
+
+ ea->promisc_enabled = promisc_enabled;
+ ea->allmulti_enabled = allmulti_enabled;
+ ea->broadcast_enabled = broadcast_enabled;
+
+ mlx5e_vport_context_update(fs, netdev);
+}
+
+static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft)
+{
+ int i;
+
+ for (i = ft->num_groups - 1; i >= 0; i--) {
+ if (!IS_ERR_OR_NULL(ft->g[i]))
+ mlx5_destroy_flow_group(ft->g[i]);
+ ft->g[i] = NULL;
+ }
+ ft->num_groups = 0;
+}
+
+void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev)
+{
+ ether_addr_copy(fs->l2.broadcast.addr, netdev->broadcast);
+}
+
+void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
+{
+ mlx5e_destroy_groups(ft);
+ kfree(ft->g);
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+}
+
+static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ struct ttc_params *ttc_params)
+{
+ struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
+
+ memset(ttc_params, 0, sizeof(*ttc_params));
+ ttc_params->ns = mlx5_get_flow_namespace(fs->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+ ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
+ ft_attr->prio = MLX5E_NIC_PRIO;
+
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_rx_res_get_tirn_direct(rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss_inner(rx_res,
+ tt);
+ }
+}
+
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ struct ttc_params *ttc_params, bool tunnel)
+
+{
+ struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
+
+ memset(ttc_params, 0, sizeof(*ttc_params));
+ ttc_params->ns = mlx5_get_flow_namespace(fs->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+ ft_attr->level = MLX5E_TTC_FT_LEVEL;
+ ft_attr->prio = MLX5E_NIC_PRIO;
+
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_rx_res_get_tirn_direct(rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss(rx_res, tt);
+ }
+
+ ttc_params->inner_ttc = tunnel;
+ if (!tunnel || !mlx5_tunnel_inner_ft_supported(fs->mdev))
+ return;
+
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ ttc_params->tunnel_dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->tunnel_dests[tt].ft =
+ mlx5_get_ttc_flow_table(fs->inner_ttc);
+ }
+}
+
+static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs,
+ struct mlx5e_l2_rule *ai)
+{
+ if (!IS_ERR_OR_NULL(ai->rule)) {
+ mlx5_del_flow_rules(ai->rule);
+ ai->rule = NULL;
+ }
+}
+
+static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs,
+ struct mlx5e_l2_rule *ai, int type)
+{
+ struct mlx5_flow_table *ft = fs->l2.ft.t;
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 *mc_dmac;
+ u8 *mv_dmac;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dmac_47_16);
+ mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dmac_47_16);
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = mlx5_get_ttc_flow_table(fs->ttc);
+
+ switch (type) {
+ case MLX5E_FULLMATCH:
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ eth_broadcast_addr(mc_dmac);
+ ether_addr_copy(mv_dmac, ai->addr);
+ break;
+
+ case MLX5E_ALLMULTI:
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ mc_dmac[0] = 0x01;
+ mv_dmac[0] = 0x01;
+ break;
+ }
+
+ ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(ai->rule)) {
+ fs_err(fs, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac);
+ err = PTR_ERR(ai->rule);
+ ai->rule = NULL;
+ }
+
+ kvfree(spec);
+
+ return err;
+}
+
+#define MLX5E_NUM_L2_GROUPS 3
+#define MLX5E_L2_GROUP1_SIZE BIT(15)
+#define MLX5E_L2_GROUP2_SIZE BIT(0)
+#define MLX5E_L2_GROUP_TRAP_SIZE BIT(0) /* must be last */
+#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\
+ MLX5E_L2_GROUP2_SIZE +\
+ MLX5E_L2_GROUP_TRAP_SIZE)
+static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_flow_table *ft = &l2_table->ft;
+ int ix = 0;
+ u8 *mc_dmac;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ mc_dmac = MLX5_ADDR_OF(fte_match_param, mc,
+ outer_headers.dmac_47_16);
+ /* Flow Group for full match */
+ eth_broadcast_addr(mc_dmac);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_L2_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ /* Flow Group for allmulti */
+ eth_zero_addr(mc_dmac);
+ mc_dmac[0] = 0x01;
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_L2_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ /* Flow Group for l2 traps */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_L2_GROUP_TRAP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err_destroy_groups:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ mlx5e_destroy_groups(ft);
+ kvfree(in);
+ kfree(ft->g);
+
+ return err;
+}
+
+static void mlx5e_destroy_l2_table(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_destroy_flow_table(&fs->l2.ft);
+}
+
+static int mlx5e_create_l2_table(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_l2_table *l2_table = &fs->l2;
+ struct mlx5e_flow_table *ft = &l2_table->ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_L2_TABLE_SIZE;
+ ft_attr.level = MLX5E_L2_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(fs->ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = mlx5e_create_l2_table_groups(l2_table);
+ if (err)
+ goto err_destroy_flow_table;
+
+ return 0;
+
+err_destroy_flow_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return err;
+}
+
+#define MLX5E_NUM_VLAN_GROUPS 5
+#define MLX5E_VLAN_GROUP0_SIZE BIT(12)
+#define MLX5E_VLAN_GROUP1_SIZE BIT(12)
+#define MLX5E_VLAN_GROUP2_SIZE BIT(1)
+#define MLX5E_VLAN_GROUP3_SIZE BIT(0)
+#define MLX5E_VLAN_GROUP_TRAP_SIZE BIT(0) /* must be last */
+#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\
+ MLX5E_VLAN_GROUP1_SIZE +\
+ MLX5E_VLAN_GROUP2_SIZE +\
+ MLX5E_VLAN_GROUP3_SIZE +\
+ MLX5E_VLAN_GROUP_TRAP_SIZE)
+
+static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in,
+ int inlen)
+{
+ int err;
+ int ix = 0;
+ u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP0_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP_TRAP_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ return 0;
+
+err_destroy_groups:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ mlx5e_destroy_groups(ft);
+
+ return err;
+}
+
+static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft)
+{
+ u32 *in;
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = __mlx5e_create_vlan_table_groups(ft, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_fs_create_vlan_table(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_flow_table *ft;
+ int err;
+
+ ft = &fs->vlan->ft;
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE;
+ ft_attr.level = MLX5E_VLAN_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(fs->ns, &ft_attr);
+ if (IS_ERR(ft->t))
+ return PTR_ERR(ft->t);
+
+ ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g) {
+ err = -ENOMEM;
+ goto err_destroy_vlan_table;
+ }
+
+ err = mlx5e_create_vlan_table_groups(ft);
+ if (err)
+ goto err_free_g;
+
+ mlx5e_fs_add_vlan_rules(fs);
+
+ return 0;
+
+err_free_g:
+ kfree(ft->g);
+err_destroy_vlan_table:
+ mlx5_destroy_flow_table(ft->t);
+
+ return err;
+}
+
+static void mlx5e_destroy_vlan_table(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_del_vlan_rules(fs);
+ mlx5e_destroy_flow_table(&fs->vlan->ft);
+}
+
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_flow_steering *fs)
+{
+ if (!mlx5_tunnel_inner_ft_supported(fs->mdev))
+ return;
+ mlx5_destroy_ttc_table(fs->inner_ttc);
+}
+
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs)
+{
+ mlx5_destroy_ttc_table(fs->ttc);
+}
+
+static int mlx5e_create_inner_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res)
+{
+ struct ttc_params ttc_params = {};
+
+ if (!mlx5_tunnel_inner_ft_supported(fs->mdev))
+ return 0;
+
+ mlx5e_set_inner_ttc_params(fs, rx_res, &ttc_params);
+ fs->inner_ttc = mlx5_create_inner_ttc_table(fs->mdev,
+ &ttc_params);
+ if (IS_ERR(fs->inner_ttc))
+ return PTR_ERR(fs->inner_ttc);
+ return 0;
+}
+
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res)
+{
+ struct ttc_params ttc_params = {};
+
+ mlx5e_set_ttc_params(fs, rx_res, &ttc_params, true);
+ fs->ttc = mlx5_create_ttc_table(fs->mdev, &ttc_params);
+ if (IS_ERR(fs->ttc))
+ return PTR_ERR(fs->ttc);
+ return 0;
+}
+
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev)
+{
+ struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(fs->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+ int err;
+
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ mlx5e_fs_set_ns(fs, ns, false);
+ err = mlx5e_arfs_create_tables(fs, rx_res,
+ !!(netdev->hw_features & NETIF_F_NTUPLE));
+ if (err) {
+ fs_err(fs, "Failed to create arfs tables, err=%d\n", err);
+ netdev->hw_features &= ~NETIF_F_NTUPLE;
+ }
+
+ err = mlx5e_create_inner_ttc_table(fs, rx_res);
+ if (err) {
+ fs_err(fs, "Failed to create inner ttc table, err=%d\n", err);
+ goto err_destroy_arfs_tables;
+ }
+
+ err = mlx5e_create_ttc_table(fs, rx_res);
+ if (err) {
+ fs_err(fs, "Failed to create ttc table, err=%d\n", err);
+ goto err_destroy_inner_ttc_table;
+ }
+
+ err = mlx5e_create_l2_table(fs);
+ if (err) {
+ fs_err(fs, "Failed to create l2 table, err=%d\n", err);
+ goto err_destroy_ttc_table;
+ }
+
+ err = mlx5e_fs_create_vlan_table(fs);
+ if (err) {
+ fs_err(fs, "Failed to create vlan table, err=%d\n", err);
+ goto err_destroy_l2_table;
+ }
+
+ err = mlx5e_ptp_alloc_rx_fs(fs, profile);
+ if (err)
+ goto err_destory_vlan_table;
+
+ mlx5e_ethtool_init_steering(fs);
+
+ return 0;
+
+err_destory_vlan_table:
+ mlx5e_destroy_vlan_table(fs);
+err_destroy_l2_table:
+ mlx5e_destroy_l2_table(fs);
+err_destroy_ttc_table:
+ mlx5e_destroy_ttc_table(fs);
+err_destroy_inner_ttc_table:
+ mlx5e_destroy_inner_ttc_table(fs);
+err_destroy_arfs_tables:
+ mlx5e_arfs_destroy_tables(fs, !!(netdev->hw_features & NETIF_F_NTUPLE));
+
+ return err;
+}
+
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile)
+{
+ mlx5e_ptp_free_rx_fs(fs, profile);
+ mlx5e_destroy_vlan_table(fs);
+ mlx5e_destroy_l2_table(fs);
+ mlx5e_destroy_ttc_table(fs);
+ mlx5e_destroy_inner_ttc_table(fs);
+ mlx5e_arfs_destroy_tables(fs, ntuple);
+ mlx5e_ethtool_cleanup_steering(fs);
+}
+
+static int mlx5e_fs_vlan_alloc(struct mlx5e_flow_steering *fs)
+{
+ fs->vlan = kvzalloc(sizeof(*fs->vlan), GFP_KERNEL);
+ if (!fs->vlan)
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlx5e_fs_vlan_free(struct mlx5e_flow_steering *fs)
+{
+ kvfree(fs->vlan);
+}
+
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs)
+{
+ return fs->vlan;
+}
+
+static int mlx5e_fs_tc_alloc(struct mlx5e_flow_steering *fs)
+{
+ fs->tc = mlx5e_tc_table_alloc();
+ if (IS_ERR(fs->tc))
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlx5e_fs_tc_free(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_tc_table_free(fs->tc);
+}
+
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs)
+{
+ return fs->tc;
+}
+
+#ifdef CONFIG_MLX5_EN_RXNFC
+static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs)
+{
+ return mlx5e_ethtool_alloc(&fs->ethtool);
+}
+
+static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_ethtool_free(fs->ethtool);
+}
+
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs)
+{
+ return fs->ethtool;
+}
+#else
+static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs)
+{ return 0; }
+static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) { }
+#endif
+
+struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
+ struct mlx5_core_dev *mdev,
+ bool state_destroy)
+{
+ struct mlx5e_flow_steering *fs;
+ int err;
+
+ fs = kvzalloc(sizeof(*fs), GFP_KERNEL);
+ if (!fs)
+ goto err;
+
+ fs->mdev = mdev;
+ fs->state_destroy = state_destroy;
+ if (mlx5e_profile_feature_cap(profile, FS_VLAN)) {
+ err = mlx5e_fs_vlan_alloc(fs);
+ if (err)
+ goto err_free_fs;
+ }
+
+ if (mlx5e_profile_feature_cap(profile, FS_TC)) {
+ err = mlx5e_fs_tc_alloc(fs);
+ if (err)
+ goto err_free_vlan;
+ }
+
+ err = mlx5e_fs_ethtool_alloc(fs);
+ if (err)
+ goto err_free_tc;
+
+ return fs;
+err_free_tc:
+ mlx5e_fs_tc_free(fs);
+err_free_vlan:
+ mlx5e_fs_vlan_free(fs);
+err_free_fs:
+ kvfree(fs);
+err:
+ return NULL;
+}
+
+void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs)
+{
+ if (!fs)
+ return;
+ mlx5e_fs_ethtool_free(fs);
+ mlx5e_fs_tc_free(fs);
+ mlx5e_fs_vlan_free(fs);
+ kvfree(fs);
+}
+
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs)
+{
+ return &fs->l2;
+}
+
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress)
+{
+ return egress ? fs->egress_ns : fs->ns;
+}
+
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress)
+{
+ if (!egress)
+ fs->ns = ns;
+ else
+ fs->egress_ns = ns;
+}
+
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner)
+{
+ return inner ? fs->inner_ttc : fs->ttc;
+}
+
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner)
+{
+ if (!inner)
+ fs->ttc = ttc;
+ else
+ fs->inner_ttc = ttc;
+}
+
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs)
+{
+ return fs->arfs;
+}
+
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs)
+{
+ fs->arfs = arfs;
+}
+#endif
+
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs)
+{
+ return fs->ptp_fs;
+}
+
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs)
+{
+ fs->ptp_fs = ptp_fs;
+}
+
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs)
+{
+ return fs->any;
+}
+
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any)
+{
+ fs->any = any;
+}
+
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs)
+{
+ return fs->accel_tcp;
+}
+
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp)
+{
+ fs->accel_tcp = accel_tcp;
+}
+#endif
+
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy)
+{
+ fs->state_destroy = state_destroy;
+}
+
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs,
+ bool vlan_strip_disable)
+{
+ fs->vlan_strip_disable = vlan_strip_disable;
+}
+
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs)
+{
+ return fs->udp;
+}
+
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp)
+{
+ fs->udp = udp;
+}
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs)
+{
+ return fs->mdev;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
new file mode 100644
index 000000000..aac32e505
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -0,0 +1,1013 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/fs.h>
+#include "en.h"
+#include "en/params.h"
+#include "en/xsk/pool.h"
+#include "en/fs_ethtool.h"
+
+struct mlx5e_ethtool_table {
+ struct mlx5_flow_table *ft;
+ int num_rules;
+};
+
+#define ETHTOOL_NUM_L3_L4_FTS 7
+#define ETHTOOL_NUM_L2_FTS 4
+
+struct mlx5e_ethtool_steering {
+ struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
+ struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
+ struct list_head rules;
+ int tot_num_rules;
+};
+
+static int flow_type_to_traffic_type(u32 flow_type);
+
+static u32 flow_type_mask(u32 flow_type)
+{
+ return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+}
+
+struct mlx5e_ethtool_rule {
+ struct list_head list;
+ struct ethtool_rx_flow_spec flow_spec;
+ struct mlx5_flow_handle *rule;
+ struct mlx5e_ethtool_table *eth_ft;
+ struct mlx5e_rss *rss;
+};
+
+static void put_flow_table(struct mlx5e_ethtool_table *eth_ft)
+{
+ if (!--eth_ft->num_rules) {
+ mlx5_destroy_flow_table(eth_ft->ft);
+ eth_ft->ft = NULL;
+ }
+}
+
+#define MLX5E_ETHTOOL_L3_L4_PRIO 0
+#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS)
+#define MLX5E_ETHTOOL_NUM_ENTRIES 64000
+#define MLX5E_ETHTOOL_NUM_GROUPS 10
+static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
+ struct ethtool_rx_flow_spec *fs,
+ int num_tuples)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_ethtool_table *eth_ft;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ int max_tuples;
+ int table_size;
+ int prio;
+
+ switch (flow_type_mask(fs->flow_type)) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+ prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+ eth_ft = &ethtool->l3_l4_ft[prio];
+ break;
+ case IP_USER_FLOW:
+ case IPV6_USER_FLOW:
+ max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+ prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+ eth_ft = &ethtool->l3_l4_ft[prio];
+ break;
+ case ETHER_FLOW:
+ max_tuples = ETHTOOL_NUM_L2_FTS;
+ prio = max_tuples - num_tuples;
+ eth_ft = &ethtool->l2_ft[prio];
+ prio += MLX5E_ETHTOOL_L2_PRIO;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ eth_ft->num_rules++;
+ if (eth_ft->ft)
+ return eth_ft;
+
+ ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_ETHTOOL);
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev,
+ flow_table_properties_nic_receive.log_max_ft_size)),
+ MLX5E_ETHTOOL_NUM_ENTRIES);
+
+ ft_attr.prio = prio;
+ ft_attr.max_fte = table_size;
+ ft_attr.autogroup.max_num_groups = MLX5E_ETHTOOL_NUM_GROUPS;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return (void *)ft;
+
+ eth_ft->ft = ft;
+ return eth_ft;
+}
+
+static void mask_spec(u8 *mask, u8 *val, size_t size)
+{
+ unsigned int i;
+
+ for (i = 0; i < size; i++, mask++, val++)
+ *((u8 *)val) = *((u8 *)mask) & *((u8 *)val);
+}
+
+#define MLX5E_FTE_SET(header_p, fld, v) \
+ MLX5_SET(fte_match_set_lyr_2_4, header_p, fld, v)
+
+#define MLX5E_FTE_ADDR_OF(header_p, fld) \
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, header_p, fld)
+
+static void
+set_ip4(void *headers_c, void *headers_v, __be32 ip4src_m,
+ __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v)
+{
+ if (ip4src_m) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ip4src_v, sizeof(ip4src_v));
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ip4src_m, sizeof(ip4src_m));
+ }
+ if (ip4dst_m) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ip4dst_v, sizeof(ip4dst_v));
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ip4dst_m, sizeof(ip4dst_m));
+ }
+
+ MLX5E_FTE_SET(headers_c, ethertype, 0xffff);
+ MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IP);
+}
+
+static void
+set_ip6(void *headers_c, void *headers_v, __be32 ip6src_m[4],
+ __be32 ip6src_v[4], __be32 ip6dst_m[4], __be32 ip6dst_v[4])
+{
+ u8 ip6_sz = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+
+ if (!ipv6_addr_any((struct in6_addr *)ip6src_m)) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ ip6src_v, ip6_sz);
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ ip6src_m, ip6_sz);
+ }
+ if (!ipv6_addr_any((struct in6_addr *)ip6dst_m)) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ ip6dst_v, ip6_sz);
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ ip6dst_m, ip6_sz);
+ }
+
+ MLX5E_FTE_SET(headers_c, ethertype, 0xffff);
+ MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IPV6);
+}
+
+static void
+set_tcp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v,
+ __be16 pdst_m, __be16 pdst_v)
+{
+ if (psrc_m) {
+ MLX5E_FTE_SET(headers_c, tcp_sport, ntohs(psrc_m));
+ MLX5E_FTE_SET(headers_v, tcp_sport, ntohs(psrc_v));
+ }
+ if (pdst_m) {
+ MLX5E_FTE_SET(headers_c, tcp_dport, ntohs(pdst_m));
+ MLX5E_FTE_SET(headers_v, tcp_dport, ntohs(pdst_v));
+ }
+
+ MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff);
+ MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_TCP);
+}
+
+static void
+set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v,
+ __be16 pdst_m, __be16 pdst_v)
+{
+ if (psrc_m) {
+ MLX5E_FTE_SET(headers_c, udp_sport, ntohs(psrc_m));
+ MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v));
+ }
+
+ if (pdst_m) {
+ MLX5E_FTE_SET(headers_c, udp_dport, ntohs(pdst_m));
+ MLX5E_FTE_SET(headers_v, udp_dport, ntohs(pdst_v));
+ }
+
+ MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff);
+ MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_UDP);
+}
+
+static void
+parse_tcp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec;
+ struct ethtool_tcpip4_spec *l4_val = &fs->h_u.tcp_ip4_spec;
+
+ set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src,
+ l4_mask->ip4dst, l4_val->ip4dst);
+
+ set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_udp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.udp_ip4_spec;
+ struct ethtool_tcpip4_spec *l4_val = &fs->h_u.udp_ip4_spec;
+
+ set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src,
+ l4_mask->ip4dst, l4_val->ip4dst);
+
+ set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_ip4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec;
+ struct ethtool_usrip4_spec *l3_val = &fs->h_u.usr_ip4_spec;
+
+ set_ip4(headers_c, headers_v, l3_mask->ip4src, l3_val->ip4src,
+ l3_mask->ip4dst, l3_val->ip4dst);
+
+ if (l3_mask->proto) {
+ MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->proto);
+ MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->proto);
+ }
+}
+
+static void
+parse_ip6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec;
+ struct ethtool_usrip6_spec *l3_val = &fs->h_u.usr_ip6_spec;
+
+ set_ip6(headers_c, headers_v, l3_mask->ip6src,
+ l3_val->ip6src, l3_mask->ip6dst, l3_val->ip6dst);
+
+ if (l3_mask->l4_proto) {
+ MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->l4_proto);
+ MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->l4_proto);
+ }
+}
+
+static void
+parse_tcp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec;
+ struct ethtool_tcpip6_spec *l4_val = &fs->h_u.tcp_ip6_spec;
+
+ set_ip6(headers_c, headers_v, l4_mask->ip6src,
+ l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst);
+
+ set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_udp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.udp_ip6_spec;
+ struct ethtool_tcpip6_spec *l4_val = &fs->h_u.udp_ip6_spec;
+
+ set_ip6(headers_c, headers_v, l4_mask->ip6src,
+ l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst);
+
+ set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_ether(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethhdr *eth_mask = &fs->m_u.ether_spec;
+ struct ethhdr *eth_val = &fs->h_u.ether_spec;
+
+ mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask));
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, smac_47_16), eth_mask->h_source);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, smac_47_16), eth_val->h_source);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), eth_mask->h_dest);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), eth_val->h_dest);
+ MLX5E_FTE_SET(headers_c, ethertype, ntohs(eth_mask->h_proto));
+ MLX5E_FTE_SET(headers_v, ethertype, ntohs(eth_val->h_proto));
+}
+
+static void
+set_cvlan(void *headers_c, void *headers_v, __be16 vlan_tci)
+{
+ MLX5E_FTE_SET(headers_c, cvlan_tag, 1);
+ MLX5E_FTE_SET(headers_v, cvlan_tag, 1);
+ MLX5E_FTE_SET(headers_c, first_vid, 0xfff);
+ MLX5E_FTE_SET(headers_v, first_vid, ntohs(vlan_tci));
+}
+
+static void
+set_dmac(void *headers_c, void *headers_v,
+ unsigned char m_dest[ETH_ALEN], unsigned char v_dest[ETH_ALEN])
+{
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), m_dest);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), v_dest);
+}
+
+static int set_flow_attrs(u32 *match_c, u32 *match_v,
+ struct ethtool_rx_flow_spec *fs)
+{
+ void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ u32 flow_type = flow_type_mask(fs->flow_type);
+
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ parse_tcp4(outer_headers_c, outer_headers_v, fs);
+ break;
+ case UDP_V4_FLOW:
+ parse_udp4(outer_headers_c, outer_headers_v, fs);
+ break;
+ case IP_USER_FLOW:
+ parse_ip4(outer_headers_c, outer_headers_v, fs);
+ break;
+ case TCP_V6_FLOW:
+ parse_tcp6(outer_headers_c, outer_headers_v, fs);
+ break;
+ case UDP_V6_FLOW:
+ parse_udp6(outer_headers_c, outer_headers_v, fs);
+ break;
+ case IPV6_USER_FLOW:
+ parse_ip6(outer_headers_c, outer_headers_v, fs);
+ break;
+ case ETHER_FLOW:
+ parse_ether(outer_headers_c, outer_headers_v, fs);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ((fs->flow_type & FLOW_EXT) &&
+ (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK)))
+ set_cvlan(outer_headers_c, outer_headers_v, fs->h_ext.vlan_tci);
+
+ if (fs->flow_type & FLOW_MAC_EXT &&
+ !is_zero_ether_addr(fs->m_ext.h_dest)) {
+ mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN);
+ set_dmac(outer_headers_c, outer_headers_v, fs->m_ext.h_dest,
+ fs->h_ext.h_dest);
+ }
+
+ return 0;
+}
+
+static void add_rule_to_list(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *rule)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
+ struct list_head *head = &ethtool->rules;
+ struct mlx5e_ethtool_rule *iter;
+
+ list_for_each_entry(iter, &ethtool->rules, list) {
+ if (iter->flow_spec.location > rule->flow_spec.location)
+ break;
+ head = &iter->list;
+ }
+ ethtool->tot_num_rules++;
+ list_add(&rule->list, head);
+}
+
+static bool outer_header_zero(u32 *match_criteria)
+{
+ int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers);
+ char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers);
+
+ return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
+ outer_headers_c + 1,
+ size - 1);
+}
+
+static int flow_get_tirn(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *eth_rule,
+ struct ethtool_rx_flow_spec *fs,
+ u32 rss_context, u32 *tirn)
+{
+ if (fs->flow_type & FLOW_RSS) {
+ struct mlx5e_packet_merge_param pkt_merge_param;
+ struct mlx5e_rss *rss;
+ u32 flow_type;
+ int err;
+ int tt;
+
+ rss = mlx5e_rx_res_rss_get(priv->rx_res, rss_context);
+ if (!rss)
+ return -ENOENT;
+
+ flow_type = flow_type_mask(fs->flow_type);
+ tt = flow_type_to_traffic_type(flow_type);
+ if (tt < 0)
+ return -EINVAL;
+
+ pkt_merge_param = priv->channels.params.packet_merge;
+ err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn);
+ if (err)
+ return err;
+ eth_rule->rss = rss;
+ mlx5e_rss_refcnt_inc(eth_rule->rss);
+ } else {
+ *tirn = mlx5e_rx_res_get_tirn_direct(priv->rx_res, fs->ring_cookie);
+ }
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *
+add_ethtool_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *eth_rule,
+ struct mlx5_flow_table *ft,
+ struct ethtool_rx_flow_spec *fs, u32 rss_context)
+{
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND };
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+ err = set_flow_attrs(spec->match_criteria, spec->match_value,
+ fs);
+ if (err)
+ goto free;
+
+ if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ } else {
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ err = flow_get_tirn(priv, eth_rule, fs, rss_context, &dst->tir_num);
+ if (err)
+ goto free;
+
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
+ spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
+ __func__, err);
+ goto free;
+ }
+free:
+ kvfree(spec);
+ kfree(dst);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static void del_ethtool_rule(struct mlx5e_flow_steering *fs,
+ struct mlx5e_ethtool_rule *eth_rule)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
+ if (eth_rule->rule)
+ mlx5_del_flow_rules(eth_rule->rule);
+ if (eth_rule->rss)
+ mlx5e_rss_refcnt_dec(eth_rule->rss);
+ list_del(&eth_rule->list);
+ ethtool->tot_num_rules--;
+ put_flow_table(eth_rule->eth_ft);
+ kfree(eth_rule);
+}
+
+static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv,
+ int location)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
+ struct mlx5e_ethtool_rule *iter;
+
+ list_for_each_entry(iter, &ethtool->rules, list) {
+ if (iter->flow_spec.location == location)
+ return iter;
+ }
+ return NULL;
+}
+
+static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv,
+ int location)
+{
+ struct mlx5e_ethtool_rule *eth_rule;
+
+ eth_rule = find_ethtool_rule(priv, location);
+ if (eth_rule)
+ del_ethtool_rule(priv->fs, eth_rule);
+
+ eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL);
+ if (!eth_rule)
+ return ERR_PTR(-ENOMEM);
+
+ add_rule_to_list(priv, eth_rule);
+ return eth_rule;
+}
+
+#define MAX_NUM_OF_ETHTOOL_RULES BIT(10)
+
+#define all_ones(field) (field == (__force typeof(field))-1)
+#define all_zeros_or_all_ones(field) \
+ ((field) == 0 || (field) == (__force typeof(field))-1)
+
+static int validate_ethter(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethhdr *eth_mask = &fs->m_u.ether_spec;
+ int ntuples = 0;
+
+ if (!is_zero_ether_addr(eth_mask->h_dest))
+ ntuples++;
+ if (!is_zero_ether_addr(eth_mask->h_source))
+ ntuples++;
+ if (eth_mask->h_proto)
+ ntuples++;
+ return ntuples;
+}
+
+static int validate_tcpudp4(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec;
+ int ntuples = 0;
+
+ if (l4_mask->tos)
+ return -EINVAL;
+
+ if (l4_mask->ip4src)
+ ntuples++;
+ if (l4_mask->ip4dst)
+ ntuples++;
+ if (l4_mask->psrc)
+ ntuples++;
+ if (l4_mask->pdst)
+ ntuples++;
+ /* Flow is TCP/UDP */
+ return ++ntuples;
+}
+
+static int validate_ip4(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec;
+ int ntuples = 0;
+
+ if (l3_mask->l4_4_bytes || l3_mask->tos ||
+ fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4)
+ return -EINVAL;
+ if (l3_mask->ip4src)
+ ntuples++;
+ if (l3_mask->ip4dst)
+ ntuples++;
+ if (l3_mask->proto)
+ ntuples++;
+ /* Flow is IPv4 */
+ return ++ntuples;
+}
+
+static int validate_ip6(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec;
+ int ntuples = 0;
+
+ if (l3_mask->l4_4_bytes || l3_mask->tclass)
+ return -EINVAL;
+ if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6src))
+ ntuples++;
+
+ if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6dst))
+ ntuples++;
+ if (l3_mask->l4_proto)
+ ntuples++;
+ /* Flow is IPv6 */
+ return ++ntuples;
+}
+
+static int validate_tcpudp6(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec;
+ int ntuples = 0;
+
+ if (l4_mask->tclass)
+ return -EINVAL;
+
+ if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6src))
+ ntuples++;
+
+ if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6dst))
+ ntuples++;
+
+ if (l4_mask->psrc)
+ ntuples++;
+ if (l4_mask->pdst)
+ ntuples++;
+ /* Flow is TCP/UDP */
+ return ++ntuples;
+}
+
+static int validate_vlan(struct ethtool_rx_flow_spec *fs)
+{
+ if (fs->m_ext.vlan_etype ||
+ fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK))
+ return -EINVAL;
+
+ if (fs->m_ext.vlan_tci &&
+ (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID))
+ return -EINVAL;
+
+ return 1;
+}
+
+static int validate_flow(struct mlx5e_priv *priv,
+ struct ethtool_rx_flow_spec *fs)
+{
+ int num_tuples = 0;
+ int ret = 0;
+
+ if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
+ return -ENOSPC;
+
+ if (fs->ring_cookie != RX_CLS_FLOW_DISC)
+ if (fs->ring_cookie >= priv->channels.params.num_channels)
+ return -EINVAL;
+
+ switch (flow_type_mask(fs->flow_type)) {
+ case ETHER_FLOW:
+ num_tuples += validate_ethter(fs);
+ break;
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ ret = validate_tcpudp4(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ case IP_USER_FLOW:
+ ret = validate_ip4(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ ret = validate_tcpudp6(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ case IPV6_USER_FLOW:
+ ret = validate_ip6(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+ if ((fs->flow_type & FLOW_EXT)) {
+ ret = validate_vlan(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ }
+
+ if (fs->flow_type & FLOW_MAC_EXT &&
+ !is_zero_ether_addr(fs->m_ext.h_dest))
+ num_tuples++;
+
+ return num_tuples;
+}
+
+static int
+mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+ struct ethtool_rx_flow_spec *fs, u32 rss_context)
+{
+ struct mlx5e_ethtool_table *eth_ft;
+ struct mlx5e_ethtool_rule *eth_rule;
+ struct mlx5_flow_handle *rule;
+ int num_tuples;
+ int err;
+
+ num_tuples = validate_flow(priv, fs);
+ if (num_tuples <= 0) {
+ netdev_warn(priv->netdev, "%s: flow is not valid %d\n",
+ __func__, num_tuples);
+ return num_tuples;
+ }
+
+ eth_ft = get_flow_table(priv, fs, num_tuples);
+ if (IS_ERR(eth_ft))
+ return PTR_ERR(eth_ft);
+
+ eth_rule = get_ethtool_rule(priv, fs->location);
+ if (IS_ERR(eth_rule)) {
+ put_flow_table(eth_ft);
+ return PTR_ERR(eth_rule);
+ }
+
+ eth_rule->flow_spec = *fs;
+ eth_rule->eth_ft = eth_ft;
+
+ rule = add_ethtool_flow_rule(priv, eth_rule, eth_ft->ft, fs, rss_context);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto del_ethtool_rule;
+ }
+
+ eth_rule->rule = rule;
+
+ return 0;
+
+del_ethtool_rule:
+ del_ethtool_rule(priv->fs, eth_rule);
+
+ return err;
+}
+
+static int
+mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location)
+{
+ struct mlx5e_ethtool_rule *eth_rule;
+ int err = 0;
+
+ if (location >= MAX_NUM_OF_ETHTOOL_RULES)
+ return -ENOSPC;
+
+ eth_rule = find_ethtool_rule(priv, location);
+ if (!eth_rule) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ del_ethtool_rule(priv->fs, eth_rule);
+out:
+ return err;
+}
+
+static int
+mlx5e_ethtool_get_flow(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, int location)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
+ struct mlx5e_ethtool_rule *eth_rule;
+
+ if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES)
+ return -EINVAL;
+
+ list_for_each_entry(eth_rule, &ethtool->rules, list) {
+ int index;
+
+ if (eth_rule->flow_spec.location != location)
+ continue;
+ if (!info)
+ return 0;
+ info->fs = eth_rule->flow_spec;
+ if (!eth_rule->rss)
+ return 0;
+ index = mlx5e_rx_res_rss_index(priv->rx_res, eth_rule->rss);
+ if (index < 0)
+ return index;
+ info->rss_context = index;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int
+mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+ int location = 0;
+ int idx = 0;
+ int err = 0;
+
+ info->data = MAX_NUM_OF_ETHTOOL_RULES;
+ while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
+ err = mlx5e_ethtool_get_flow(priv, NULL, location);
+ if (!err)
+ rule_locs[idx++] = location;
+ location++;
+ }
+ return err;
+}
+
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{
+ *ethtool = kvzalloc(sizeof(**ethtool), GFP_KERNEL);
+ if (!*ethtool)
+ return -ENOMEM;
+ return 0;
+}
+
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool)
+{
+ kvfree(ethtool);
+}
+
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
+ struct mlx5e_ethtool_rule *iter;
+ struct mlx5e_ethtool_rule *temp;
+
+ list_for_each_entry_safe(iter, temp, &ethtool->rules, list)
+ del_ethtool_rule(fs, iter);
+}
+
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
+
+ INIT_LIST_HEAD(&ethtool->rules);
+}
+
+static int flow_type_to_traffic_type(u32 flow_type)
+{
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ return MLX5_TT_IPV4_TCP;
+ case TCP_V6_FLOW:
+ return MLX5_TT_IPV6_TCP;
+ case UDP_V4_FLOW:
+ return MLX5_TT_IPV4_UDP;
+ case UDP_V6_FLOW:
+ return MLX5_TT_IPV6_UDP;
+ case AH_V4_FLOW:
+ return MLX5_TT_IPV4_IPSEC_AH;
+ case AH_V6_FLOW:
+ return MLX5_TT_IPV6_IPSEC_AH;
+ case ESP_V4_FLOW:
+ return MLX5_TT_IPV4_IPSEC_ESP;
+ case ESP_V6_FLOW:
+ return MLX5_TT_IPV6_IPSEC_ESP;
+ case IPV4_FLOW:
+ return MLX5_TT_IPV4;
+ case IPV6_FLOW:
+ return MLX5_TT_IPV6;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ u8 rx_hash_field = 0;
+ int err;
+ int tt;
+
+ tt = flow_type_to_traffic_type(nfc->flow_type);
+ if (tt < 0)
+ return tt;
+
+ /* RSS does not support anything other than hashing to queues
+ * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
+ * port.
+ */
+ if (nfc->flow_type != TCP_V4_FLOW &&
+ nfc->flow_type != TCP_V6_FLOW &&
+ nfc->flow_type != UDP_V4_FLOW &&
+ nfc->flow_type != UDP_V6_FLOW)
+ return -EOPNOTSUPP;
+
+ if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3))
+ return -EOPNOTSUPP;
+
+ if (nfc->data & RXH_IP_SRC)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP;
+ if (nfc->data & RXH_IP_DST)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP;
+ if (nfc->data & RXH_L4_B_0_1)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT;
+ if (nfc->data & RXH_L4_B_2_3)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ u32 hash_field = 0;
+ int tt;
+
+ tt = flow_type_to_traffic_type(nfc->flow_type);
+ if (tt < 0)
+ return tt;
+
+ hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt);
+ nfc->data = 0;
+
+ if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
+ nfc->data |= RXH_IP_SRC;
+ if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP)
+ nfc->data |= RXH_IP_DST;
+ if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT)
+ nfc->data |= RXH_L4_B_0_1;
+ if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT)
+ nfc->data |= RXH_L4_B_2_3;
+
+ return 0;
+}
+
+int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
+{
+ int err = 0;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ err = mlx5e_ethtool_flow_replace(priv, &cmd->fs, cmd->rss_context);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
+ break;
+ case ETHTOOL_SRXFH:
+ err = mlx5e_set_rss_hash_opt(priv, cmd);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
+ int err = 0;
+
+ switch (info->cmd) {
+ case ETHTOOL_GRXCLSRLCNT:
+ info->rule_cnt = ethtool->tot_num_rules;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ err = mlx5e_ethtool_get_flow(priv, info, info->fs.location);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
+ break;
+ case ETHTOOL_GRXFH:
+ err = mlx5e_get_rss_hash_opt(priv, info);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
new file mode 100644
index 000000000..9910a0480
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -0,0 +1,6021 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tc_act/tc_gact.h>
+#include <linux/mlx5/fs.h>
+#include <net/vxlan.h>
+#include <net/geneve.h>
+#include <linux/bpf.h>
+#include <linux/if_bridge.h>
+#include <linux/filter.h>
+#include <net/page_pool.h>
+#include <net/xdp_sock_drv.h>
+#include "eswitch.h"
+#include "en.h"
+#include "en/txrx.h"
+#include "en_tc.h"
+#include "en_rep.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/macsec.h"
+#include "en_accel/en_accel.h"
+#include "en_accel/ktls.h"
+#include "lib/vxlan.h"
+#include "lib/clock.h"
+#include "en/port.h"
+#include "en/xdp.h"
+#include "lib/eq.h"
+#include "en/monitor_stats.h"
+#include "en/health.h"
+#include "en/params.h"
+#include "en/xsk/pool.h"
+#include "en/xsk/setup.h"
+#include "en/xsk/rx.h"
+#include "en/xsk/tx.h"
+#include "en/hv_vhca_stats.h"
+#include "en/devlink.h"
+#include "lib/mlx5.h"
+#include "en/ptp.h"
+#include "en/htb.h"
+#include "qos.h"
+#include "en/trap.h"
+
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u16 umr_wqebbs, max_wqebbs;
+ bool striding_rq_umr;
+
+ striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+ MLX5_CAP_ETH(mdev, reg_umr_sq);
+ if (!striding_rq_umr)
+ return false;
+
+ umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
+ max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+ /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is
+ * calculated from mlx5e_get_max_sq_aligned_wqebbs.
+ */
+ if (WARN_ON(umr_wqebbs > max_wqebbs))
+ return false;
+
+ return true;
+}
+
+void mlx5e_update_carrier(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 port_state;
+ bool up;
+
+ port_state = mlx5_query_vport_state(mdev,
+ MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
+ 0);
+
+ up = port_state == VPORT_STATE_UP;
+ if (up == netif_carrier_ok(priv->netdev))
+ netif_carrier_event(priv->netdev);
+ if (up) {
+ netdev_info(priv->netdev, "Link up\n");
+ netif_carrier_on(priv->netdev);
+ } else {
+ netdev_info(priv->netdev, "Link down\n");
+ netif_carrier_off(priv->netdev);
+ }
+}
+
+static void mlx5e_update_carrier_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ update_carrier_work);
+
+ mutex_lock(&priv->state_lock);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ if (priv->profile->update_carrier)
+ priv->profile->update_carrier(priv);
+ mutex_unlock(&priv->state_lock);
+}
+
+static void mlx5e_update_stats_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ update_stats_work);
+
+ mutex_lock(&priv->state_lock);
+ priv->profile->update_stats(priv);
+ mutex_unlock(&priv->state_lock);
+}
+
+void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
+{
+ if (!priv->profile->update_stats)
+ return;
+
+ if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state)))
+ return;
+
+ queue_work(priv->wq, &priv->update_stats_work);
+}
+
+static int async_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+ struct mlx5_eqe *eqe = data;
+
+ if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+ return NOTIFY_DONE;
+
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ queue_work(priv->wq, &priv->update_carrier_work);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
+{
+ priv->events_nb.notifier_call = async_event;
+ mlx5_notifier_register(priv->mdev, &priv->events_nb);
+}
+
+static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
+{
+ mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
+}
+
+static int blocking_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb);
+ int err;
+
+ switch (event) {
+ case MLX5_DRIVER_EVENT_TYPE_TRAP:
+ err = mlx5e_handle_trap_event(priv, data);
+ break;
+ default:
+ netdev_warn(priv->netdev, "Sync event: Unknown event %ld\n", event);
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv)
+{
+ priv->blocking_events_nb.notifier_call = blocking_event;
+ mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb);
+}
+
+static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
+{
+ mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb);
+}
+
+static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u32 sz;
+
+ sz = ALIGN(entries * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT);
+
+ return sz / MLX5_OCTWORD;
+}
+
+static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
+ struct mlx5e_icosq *sq,
+ struct mlx5e_umr_wqe *wqe)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+ u16 octowords;
+ u8 ds_cnt;
+
+ ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode),
+ MLX5_SEND_WQE_DS);
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ ds_cnt);
+ cseg->umr_mkey = rq->mpwqe.umr_mkey_be;
+
+ ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
+ octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode);
+ ucseg->xlt_octowords = cpu_to_be16(octowords);
+ ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+}
+
+static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node)
+{
+ rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo),
+ GFP_KERNEL, node);
+ if (!rq->mpwqe.shampo)
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq)
+{
+ kvfree(rq->mpwqe.shampo);
+}
+
+static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+
+ shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
+ node);
+ if (!shampo->bitmap)
+ return -ENOMEM;
+
+ shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq,
+ sizeof(*shampo->info)),
+ GFP_KERNEL, node);
+ if (!shampo->info) {
+ kvfree(shampo->bitmap);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
+{
+ kvfree(rq->mpwqe.shampo->bitmap);
+ kvfree(rq->mpwqe.shampo->info);
+}
+
+static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
+{
+ int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ size_t alloc_size;
+
+ alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units,
+ rq->mpwqe.pages_per_wqe));
+
+ rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node);
+ if (!rq->mpwqe.info)
+ return -ENOMEM;
+
+ mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
+
+ return 0;
+}
+
+
+static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return MLX5_MKC_ACCESS_MODE_MTT;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return MLX5_MKC_ACCESS_MODE_KSM;
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ return MLX5_MKC_ACCESS_MODE_KLMS;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ return MLX5_MKC_ACCESS_MODE_KSM;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
+ return 0;
+}
+
+static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
+ u32 npages, u8 page_shift, u32 *umr_mkey,
+ dma_addr_t filler_addr,
+ enum mlx5e_mpwrq_umr_mode umr_mode,
+ u32 xsk_chunk_size)
+{
+ struct mlx5_mtt *mtt;
+ struct mlx5_ksm *ksm;
+ struct mlx5_klm *klm;
+ u32 octwords;
+ int inlen;
+ void *mkc;
+ u32 *in;
+ int err;
+ int i;
+
+ if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED ||
+ umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) &&
+ !MLX5_CAP_GEN(mdev, fixed_buffer_size)) {
+ mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n");
+ return -EINVAL;
+ }
+
+ octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode);
+
+ inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in),
+ MLX5_OCTWORD, octwords);
+ if (inlen < 0)
+ return inlen;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode));
+ mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET64(mkc, mkc, len, npages << page_shift);
+ MLX5_SET(mkc, mkc, translations_octword_size, octwords);
+ if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)
+ MLX5_SET(mkc, mkc, log_page_size, page_shift - 2);
+ else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED)
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords);
+
+ /* Initialize the mkey with all MTTs pointing to a default
+ * page (filler_addr). When the channels are activated, UMR
+ * WQEs will redirect the RX WQEs to the actual memory from
+ * the RQ's pool, while the gaps (wqe_overflow) remain mapped
+ * to the default page.
+ */
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages; i++) {
+ klm[i << 1] = (struct mlx5_klm) {
+ .va = cpu_to_be64(filler_addr),
+ .bcount = cpu_to_be32(xsk_chunk_size),
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ };
+ klm[(i << 1) + 1] = (struct mlx5_klm) {
+ .va = cpu_to_be64(filler_addr),
+ .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size),
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ };
+ }
+ break;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages; i++)
+ ksm[i] = (struct mlx5_ksm) {
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ .va = cpu_to_be64(filler_addr),
+ };
+ break;
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages; i++)
+ mtt[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(filler_addr),
+ };
+ break;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0; i < npages * 4; i++) {
+ ksm[i] = (struct mlx5_ksm) {
+ .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
+ .va = cpu_to_be64(filler_addr),
+ };
+ }
+ break;
+ }
+
+ err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
+ u64 nentries,
+ u32 *umr_mkey)
+{
+ int inlen;
+ void *mkc;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
+ mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(mkc, mkc, translations_octword_size, nentries);
+ MLX5_SET(mkc, mkc, length64, 1);
+ err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
+{
+ u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0;
+ u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ u32 num_entries, max_num_entries;
+ u32 umr_mkey;
+ int err;
+
+ max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode);
+
+ /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */
+ if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe,
+ &num_entries) ||
+ num_entries > max_num_entries))
+ mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n",
+ __func__, wq_size, rq->mpwqe.mtts_per_wqe,
+ max_num_entries);
+
+ err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift,
+ &umr_mkey, rq->wqe_overflow.addr,
+ rq->mpwqe.umr_mode, xsk_chunk_size);
+ rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey);
+ return err;
+}
+
+static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq *rq)
+{
+ u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+
+ if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) {
+ mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
+ max_klm_size, rq->mpwqe.shampo->hd_per_wq);
+ return -EINVAL;
+ }
+ return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
+ &rq->mpwqe.shampo->mkey);
+}
+
+static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
+{
+ struct mlx5e_wqe_frag_info next_frag = {};
+ struct mlx5e_wqe_frag_info *prev = NULL;
+ int i;
+
+ if (rq->xsk_pool) {
+ /* Assumptions used by XSK batched allocator. */
+ WARN_ON(rq->wqe.info.num_frags != 1);
+ WARN_ON(rq->wqe.info.log_num_frags != 0);
+ WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE);
+ }
+
+ next_frag.au = &rq->wqe.alloc_units[0];
+
+ for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
+ struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
+ struct mlx5e_wqe_frag_info *frag =
+ &rq->wqe.frags[i << rq->wqe.info.log_num_frags];
+ int f;
+
+ for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
+ if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
+ next_frag.au++;
+ next_frag.offset = 0;
+ if (prev)
+ prev->last_in_page = true;
+ }
+ *frag = next_frag;
+
+ /* prepare next */
+ next_frag.offset += frag_info[f].frag_stride;
+ prev = frag;
+ }
+ }
+
+ if (prev)
+ prev->last_in_page = true;
+}
+
+static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node)
+{
+ int len = wq_sz << rq->wqe.info.log_num_frags;
+
+ rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)),
+ GFP_KERNEL, node);
+ if (!rq->wqe.alloc_units)
+ return -ENOMEM;
+
+ mlx5e_init_frags_partition(rq);
+
+ return 0;
+}
+
+static void mlx5e_free_au_list(struct mlx5e_rq *rq)
+{
+ kvfree(rq->wqe.alloc_units);
+}
+
+static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
+{
+ struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work);
+
+ mlx5e_reporter_rq_cqe_err(rq);
+}
+
+static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
+{
+ rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
+ if (!rq->wqe_overflow.page)
+ return -ENOMEM;
+
+ rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
+ PAGE_SIZE, rq->buff.map_dir);
+ if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
+ __free_page(rq->wqe_overflow.page);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
+{
+ dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
+ rq->buff.map_dir);
+ __free_page(rq->wqe_overflow.page);
+}
+
+static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ int err;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = c->netdev;
+ rq->priv = c->priv;
+ rq->tstamp = c->tstamp;
+ rq->clock = &mdev->clock;
+ rq->icosq = &c->icosq;
+ rq->ix = c->ix;
+ rq->channel = c;
+ rq->mdev = mdev;
+ rq->hw_mtu =
+ MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en;
+ rq->xdpsq = &c->rq_xdpsq;
+ rq->stats = &c->priv->channel_stats[c->ix]->rq;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ err = mlx5e_rq_set_handlers(rq, params, NULL);
+ if (err)
+ return err;
+
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id);
+}
+
+static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rqp,
+ struct mlx5e_rq *rq,
+ u32 *pool_size,
+ int node)
+{
+ void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
+ int wq_size;
+ int err;
+
+ if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ return 0;
+ err = mlx5e_rq_shampo_hd_alloc(rq, node);
+ if (err)
+ goto out;
+ rq->mpwqe.shampo->hd_per_wq =
+ mlx5e_shampo_hd_per_wq(mdev, params, rqp);
+ err = mlx5e_create_rq_hd_umr_mkey(mdev, rq);
+ if (err)
+ goto err_shampo_hd;
+ err = mlx5e_rq_shampo_hd_info_alloc(rq, node);
+ if (err)
+ goto err_shampo_info;
+ rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node);
+ if (!rq->hw_gro_data) {
+ err = -ENOMEM;
+ goto err_hw_gro_data;
+ }
+ rq->mpwqe.shampo->key =
+ cpu_to_be32(rq->mpwqe.shampo->mkey);
+ rq->mpwqe.shampo->hd_per_wqe =
+ mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
+ wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
+ MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
+ return 0;
+
+err_hw_gro_data:
+ mlx5e_rq_shampo_hd_info_free(rq);
+err_shampo_info:
+ mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey);
+err_shampo_hd:
+ mlx5e_rq_shampo_hd_free(rq);
+out:
+ return err;
+}
+
+static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
+{
+ if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ return;
+
+ kvfree(rq->hw_gro_data);
+ mlx5e_rq_shampo_hd_info_free(rq);
+ mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey);
+ mlx5e_rq_shampo_hd_free(rq);
+}
+
+static int mlx5e_alloc_rq(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_param *rqp,
+ int node, struct mlx5e_rq *rq)
+{
+ struct page_pool_params pp_params = { 0 };
+ struct mlx5_core_dev *mdev = rq->mdev;
+ void *rqc = rqp->rqc;
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ u32 pool_size;
+ int wq_sz;
+ int err;
+ int i;
+
+ rqp->wq.db_numa_node = node;
+ INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
+
+ if (params->xdp_prog)
+ bpf_prog_inc(params->xdp_prog);
+ RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
+
+ rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+ rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
+ pool_size = 1 << params->log_rq_mtu_frames;
+
+ rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
+
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
+ &rq->wq_ctrl);
+ if (err)
+ goto err_rq_xdp_prog;
+
+ err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
+ if (err)
+ goto err_rq_wq_destroy;
+
+ rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
+
+ wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+
+ rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ rq->mpwqe.pages_per_wqe =
+ mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode);
+ rq->mpwqe.umr_wqebbs =
+ mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode);
+ rq->mpwqe.mtts_per_wqe =
+ mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift,
+ rq->mpwqe.umr_mode);
+
+ pool_size = rq->mpwqe.pages_per_wqe <<
+ mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
+
+ rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ rq->mpwqe.num_strides =
+ BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+ rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz);
+
+ rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
+
+ err = mlx5e_create_rq_umr_mkey(mdev, rq);
+ if (err)
+ goto err_rq_drop_page;
+
+ err = mlx5e_rq_alloc_mpwqe_info(rq, node);
+ if (err)
+ goto err_rq_mkey;
+
+ err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node);
+ if (err)
+ goto err_free_mpwqe_info;
+
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
+ &rq->wq_ctrl);
+ if (err)
+ goto err_rq_xdp_prog;
+
+ rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
+
+ wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
+
+ rq->wqe.info = rqp->frags_info;
+ rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
+
+ rq->wqe.frags =
+ kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
+ (wq_sz << rq->wqe.info.log_num_frags)),
+ GFP_KERNEL, node);
+ if (!rq->wqe.frags) {
+ err = -ENOMEM;
+ goto err_rq_wq_destroy;
+ }
+
+ err = mlx5e_init_au_list(rq, wq_sz, node);
+ if (err)
+ goto err_rq_frags;
+ }
+
+ if (xsk) {
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_XSK_BUFF_POOL, NULL);
+ xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
+ } else {
+ /* Create a page_pool and register it with rxq */
+ pp_params.order = 0;
+ pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
+ pp_params.pool_size = pool_size;
+ pp_params.nid = node;
+ pp_params.dev = rq->pdev;
+ pp_params.dma_dir = rq->buff.map_dir;
+
+ /* page_pool can be used even when there is no rq->xdp_prog,
+ * given page_pool does not handle DMA mapping there is no
+ * required state to clear. And page_pool gracefully handle
+ * elevated refcnt.
+ */
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ err = PTR_ERR(rq->page_pool);
+ rq->page_pool = NULL;
+ goto err_free_by_rq_type;
+ }
+ if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_PAGE_POOL, rq->page_pool);
+ }
+ if (err)
+ goto err_destroy_page_pool;
+
+ for (i = 0; i < wq_sz; i++) {
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ struct mlx5e_rx_wqe_ll *wqe =
+ mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
+ u32 byte_count =
+ rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
+ u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) <<
+ rq->mpwqe.page_shift;
+ u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ?
+ 0 : rq->buff.headroom;
+
+ wqe->data[0].addr = cpu_to_be64(dma_offset + headroom);
+ wqe->data[0].byte_count = cpu_to_be32(byte_count);
+ wqe->data[0].lkey = rq->mpwqe.umr_mkey_be;
+ } else {
+ struct mlx5e_rx_wqe_cyc *wqe =
+ mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
+ int f;
+
+ for (f = 0; f < rq->wqe.info.num_frags; f++) {
+ u32 frag_size = rq->wqe.info.arr[f].frag_size |
+ MLX5_HW_START_PADDING;
+
+ wqe->data[f].byte_count = cpu_to_be32(frag_size);
+ wqe->data[f].lkey = rq->mkey_be;
+ }
+ /* check if num_frags is not a pow of two */
+ if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
+ wqe->data[f].byte_count = 0;
+ wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ wqe->data[f].addr = 0;
+ }
+ }
+ }
+
+ INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work);
+
+ switch (params->rx_cq_moderation.cq_period_mode) {
+ case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
+ rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+ break;
+ case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
+ default:
+ rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ }
+
+ rq->page_cache.head = 0;
+ rq->page_cache.tail = 0;
+
+ return 0;
+
+err_destroy_page_pool:
+ page_pool_destroy(rq->page_pool);
+err_free_by_rq_type:
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ mlx5e_rq_free_shampo(rq);
+err_free_mpwqe_info:
+ kvfree(rq->mpwqe.info);
+err_rq_mkey:
+ mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be));
+err_rq_drop_page:
+ mlx5e_free_mpwqe_rq_drop_page(rq);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ mlx5e_free_au_list(rq);
+err_rq_frags:
+ kvfree(rq->wqe.frags);
+ }
+err_rq_wq_destroy:
+ mlx5_wq_destroy(&rq->wq_ctrl);
+err_rq_xdp_prog:
+ if (params->xdp_prog)
+ bpf_prog_put(params->xdp_prog);
+
+ return err;
+}
+
+static void mlx5e_free_rq(struct mlx5e_rq *rq)
+{
+ struct bpf_prog *old_prog;
+ int i;
+
+ if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
+ old_prog = rcu_dereference_protected(rq->xdp_prog,
+ lockdep_is_held(&rq->priv->state_lock));
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ kvfree(rq->mpwqe.info);
+ mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be));
+ mlx5e_free_mpwqe_rq_drop_page(rq);
+ mlx5e_rq_free_shampo(rq);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ kvfree(rq->wqe.frags);
+ mlx5e_free_au_list(rq);
+ }
+
+ for (i = rq->page_cache.head; i != rq->page_cache.tail;
+ i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
+ /* With AF_XDP, page_cache is not used, so this loop is not
+ * entered, and it's safe to call mlx5e_page_release_dynamic
+ * directly.
+ */
+ mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false);
+ }
+
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ page_pool_destroy(rq->page_pool);
+ mlx5_wq_destroy(&rq->wq_ctrl);
+}
+
+int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
+{
+ struct mlx5_core_dev *mdev = rq->mdev;
+ u8 ts_format;
+ void *in;
+ void *rqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
+ sizeof(u64) * rq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ ts_format = mlx5_is_real_time_rq(mdev) ?
+ MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+ MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ memcpy(rqc, param->rqc, sizeof(param->rqc));
+
+ MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, ts_format, ts_format);
+ MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+ MLX5_SET(wq, wq, log_headers_buffer_entry_num,
+ order_base_2(rq->mpwqe.shampo->hd_per_wq));
+ MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey);
+ }
+
+ mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
+{
+ struct mlx5_core_dev *mdev = rq->mdev;
+
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY)
+ mlx5e_rqwq_reset(rq);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rq_state, curr_state);
+ MLX5_SET(rqc, rqc, state, next_state);
+
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+{
+ struct net_device *dev = rq->netdev;
+ int err;
+
+ err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
+ if (err) {
+ netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
+ return err;
+ }
+ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err) {
+ netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
+{
+ mlx5e_free_rx_descs(rq);
+
+ return mlx5e_rq_to_ready(rq, curr_state);
+}
+
+static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
+{
+ struct mlx5_core_dev *mdev = rq->mdev;
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
+ MLX5_SET(rqc, rqc, vsd, vsd);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
+
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in);
+
+ kvfree(in);
+
+ return err;
+}
+
+void mlx5e_destroy_rq(struct mlx5e_rq *rq)
+{
+ mlx5_core_destroy_rq(rq->mdev, rq->rqn);
+}
+
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
+{
+ unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
+
+ u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq));
+
+ do {
+ if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes)
+ return 0;
+
+ msleep(20);
+ } while (time_before(jiffies, exp_time));
+
+ netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
+ rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
+
+ mlx5e_reporter_rx_timeout(rq);
+ return -ETIMEDOUT;
+}
+
+void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq;
+ u16 head;
+ int i;
+
+ if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ return;
+
+ wq = &rq->mpwqe.wq;
+ head = wq->head;
+
+ /* Outstanding UMR WQEs (in progress) start at wq->head */
+ for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
+ rq->dealloc_wqe(rq, head);
+ head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
+ }
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+ u16 len;
+
+ len = (rq->mpwqe.shampo->pi - rq->mpwqe.shampo->ci) &
+ (rq->mpwqe.shampo->hd_per_wq - 1);
+ mlx5e_shampo_dealloc_hd(rq, len, rq->mpwqe.shampo->ci, false);
+ rq->mpwqe.shampo->pi = rq->mpwqe.shampo->ci;
+ }
+
+ rq->mpwqe.actual_wq_head = wq->head;
+ rq->mpwqe.umr_in_progress = 0;
+ rq->mpwqe.umr_completed = 0;
+}
+
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+ __be16 wqe_ix_be;
+ u16 wqe_ix;
+
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+
+ mlx5e_free_rx_in_progress_descs(rq);
+
+ while (!mlx5_wq_ll_is_empty(wq)) {
+ struct mlx5e_rx_wqe_ll *wqe;
+
+ wqe_ix_be = *wq->tail_next;
+ wqe_ix = be16_to_cpu(wqe_ix_be);
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_ix);
+ rq->dealloc_wqe(rq, wqe_ix);
+ mlx5_wq_ll_pop(wq, wqe_ix_be,
+ &wqe->next.next_wqe_index);
+ }
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ mlx5e_shampo_dealloc_hd(rq, rq->mpwqe.shampo->hd_per_wq,
+ 0, true);
+ } else {
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+
+ while (!mlx5_wq_cyc_is_empty(wq)) {
+ wqe_ix = mlx5_wq_cyc_get_tail(wq);
+ rq->dealloc_wqe(rq, wqe_ix);
+ mlx5_wq_cyc_pop(wq);
+ }
+ }
+
+}
+
+int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
+ struct mlx5e_xsk_param *xsk, int node,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = rq->mdev;
+ int err;
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state);
+
+ err = mlx5e_alloc_rq(params, xsk, param, node, rq);
+ if (err)
+ return err;
+
+ err = mlx5e_create_rq(rq, param);
+ if (err)
+ goto err_free_rq;
+
+ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err)
+ goto err_destroy_rq;
+
+ if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
+ __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
+
+ if (params->rx_dim_enabled)
+ __set_bit(MLX5E_RQ_STATE_AM, &rq->state);
+
+ /* We disable csum_complete when XDP is enabled since
+ * XDP programs might manipulate packets which will render
+ * skb->checksum incorrect.
+ */
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog)
+ __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state);
+
+ /* For CQE compression on striding RQ, use stride index provided by
+ * HW if capability is supported.
+ */
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
+ MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index))
+ __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);
+
+ return 0;
+
+err_destroy_rq:
+ mlx5e_destroy_rq(rq);
+err_free_rq:
+ mlx5e_free_rq(rq);
+
+ return err;
+}
+
+void mlx5e_activate_rq(struct mlx5e_rq *rq)
+{
+ set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+}
+
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+{
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
+}
+
+void mlx5e_close_rq(struct mlx5e_rq *rq)
+{
+ cancel_work_sync(&rq->dim.work);
+ cancel_work_sync(&rq->recover_work);
+ mlx5e_destroy_rq(rq);
+ mlx5e_free_rx_descs(rq);
+ mlx5e_free_rq(rq);
+}
+
+static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
+{
+ kvfree(sq->db.xdpi_fifo.xi);
+ kvfree(sq->db.wqe_info);
+}
+
+static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
+{
+ struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+ size_t size;
+
+ size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq);
+ xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
+ if (!xdpi_fifo->xi)
+ return -ENOMEM;
+
+ xdpi_fifo->pc = &sq->xdpi_fifo_pc;
+ xdpi_fifo->cc = &sq->xdpi_fifo_cc;
+ xdpi_fifo->mask = dsegs_per_wq - 1;
+
+ return 0;
+}
+
+static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ size_t size;
+ int err;
+
+ size = array_size(sizeof(*sq->db.wqe_info), wq_sz);
+ sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
+ if (!sq->db.wqe_info)
+ return -ENOMEM;
+
+ err = mlx5e_alloc_xdpsq_fifo(sq, numa);
+ if (err) {
+ mlx5e_free_xdpsq_db(sq);
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct xsk_buff_pool *xsk_pool,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_xdpsq *sq,
+ bool is_redirect)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+
+ sq->pdev = c->pdev;
+ sq->mkey_be = c->mkey_be;
+ sq->channel = c;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
+ sq->xsk_pool = xsk_pool;
+
+ sq->stats = sq->xsk_pool ?
+ &c->priv->channel_stats[c->ix]->xsksq :
+ is_redirect ?
+ &c->priv->channel_stats[c->ix]->xdpsq :
+ &c->priv->channel_stats[c->ix]->rq_xdpsq;
+ sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) :
+ mlx5e_stop_room_for_max_wqe(mdev);
+ sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
+ if (err)
+ goto err_sq_wq_destroy;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
+{
+ mlx5e_free_xdpsq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
+{
+ kvfree(sq->db.wqe_info);
+}
+
+static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ size_t size;
+
+ size = array_size(wq_sz, sizeof(*sq->db.wqe_info));
+ sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
+ if (!sq->db.wqe_info)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
+{
+ struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
+ recover_work);
+
+ mlx5e_reporter_icosq_cqe_err(sq);
+}
+
+static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work)
+{
+ struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
+ recover_work);
+
+ /* Not implemented yet. */
+
+ netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n");
+}
+
+static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+
+ sq->channel = c;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+ sq->reserved_room = param->stop_room;
+
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
+ if (err)
+ goto err_sq_wq_destroy;
+
+ INIT_WORK(&sq->recover_work, recover_work_func);
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
+{
+ mlx5e_free_icosq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
+{
+ kvfree(sq->db.wqe_info);
+ kvfree(sq->db.skb_fifo.fifo);
+ kvfree(sq->db.dma_fifo);
+}
+
+int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+
+ sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
+ sizeof(*sq->db.dma_fifo)),
+ GFP_KERNEL, numa);
+ sq->db.skb_fifo.fifo = kvzalloc_node(array_size(df_sz,
+ sizeof(*sq->db.skb_fifo.fifo)),
+ GFP_KERNEL, numa);
+ sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
+ sizeof(*sq->db.wqe_info)),
+ GFP_KERNEL, numa);
+ if (!sq->db.dma_fifo || !sq->db.skb_fifo.fifo || !sq->db.wqe_info) {
+ mlx5e_free_txqsq_db(sq);
+ return -ENOMEM;
+ }
+
+ sq->dma_fifo_mask = df_sz - 1;
+
+ sq->db.skb_fifo.pc = &sq->skb_fifo_pc;
+ sq->db.skb_fifo.cc = &sq->skb_fifo_cc;
+ sq->db.skb_fifo.mask = df_sz - 1;
+
+ return 0;
+}
+
+static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
+ int txq_ix,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq,
+ int tc)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+
+ sq->pdev = c->pdev;
+ sq->clock = &mdev->clock;
+ sq->mkey_be = c->mkey_be;
+ sq->netdev = c->netdev;
+ sq->mdev = c->mdev;
+ sq->priv = c->priv;
+ sq->ch_ix = c->ix;
+ sq->txq_ix = txq_ix;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+ INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
+ if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+ set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
+ if (mlx5_ipsec_device_caps(c->priv->mdev))
+ set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
+ if (param->is_mpw)
+ set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
+ sq->stop_room = param->stop_room;
+ sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
+
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
+ if (err)
+ goto err_sq_wq_destroy;
+
+ INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
+ sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
+{
+ mlx5e_free_txqsq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_create_sq_param *csp,
+ u32 *sqn)
+{
+ u8 ts_format;
+ void *in;
+ void *sqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) * csp->wq_ctrl->buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ ts_format = mlx5_is_real_time_sq(mdev) ?
+ MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+ MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ memcpy(sqc, param->sqc, sizeof(param->sqc));
+ MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz);
+ MLX5_SET(sqc, sqc, tis_num_0, csp->tisn);
+ MLX5_SET(sqc, sqc, cqn, csp->cqn);
+ MLX5_SET(sqc, sqc, ts_cqe_to_dest_cqn, csp->ts_cqe_to_dest_cqn);
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
+
+
+ if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+ MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode);
+
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index);
+ MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma);
+
+ mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_sq(mdev, in, inlen, sqn);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+ struct mlx5e_modify_sq_param *p)
+{
+ u64 bitmask = 0;
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+ MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
+ MLX5_SET(sqc, sqc, state, p->next_state);
+ if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
+ bitmask |= 1;
+ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
+ }
+ if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) {
+ bitmask |= 1 << 2;
+ MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id);
+ }
+ MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask);
+
+ err = mlx5_core_modify_sq(mdev, sqn, in);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
+{
+ mlx5_core_destroy_sq(mdev, sqn);
+}
+
+int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_create_sq_param *csp,
+ u16 qos_queue_group_id,
+ u32 *sqn)
+{
+ struct mlx5e_modify_sq_param msp = {0};
+ int err;
+
+ err = mlx5e_create_sq(mdev, param, csp, sqn);
+ if (err)
+ return err;
+
+ msp.curr_state = MLX5_SQC_STATE_RST;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+ if (qos_queue_group_id) {
+ msp.qos_update = true;
+ msp.qos_queue_group_id = qos_queue_group_id;
+ }
+ err = mlx5e_modify_sq(mdev, *sqn, &msp);
+ if (err)
+ mlx5e_destroy_sq(mdev, *sqn);
+
+ return err;
+}
+
+static int mlx5e_set_sq_maxrate(struct net_device *dev,
+ struct mlx5e_txqsq *sq, u32 rate);
+
+int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
+ struct mlx5e_params *params, struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id,
+ struct mlx5e_sq_stats *sq_stats)
+{
+ struct mlx5e_create_sq_param csp = {};
+ u32 tx_rate;
+ int err;
+
+ err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc);
+ if (err)
+ return err;
+
+ sq->stats = sq_stats;
+
+ csp.tisn = tisn;
+ csp.tis_lst_sz = 1;
+ csp.cqn = sq->cq.mcq.cqn;
+ csp.wq_ctrl = &sq->wq_ctrl;
+ csp.min_inline_mode = sq->min_inline_mode;
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn);
+ if (err)
+ goto err_free_txqsq;
+
+ tx_rate = c->priv->tx_rates[sq->txq_ix];
+ if (tx_rate)
+ mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
+
+ if (params->tx_dim_enabled)
+ sq->state |= BIT(MLX5E_SQ_STATE_AM);
+
+ return 0;
+
+err_free_txqsq:
+ mlx5e_free_txqsq(sq);
+
+ return err;
+}
+
+void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
+{
+ sq->txq = netdev_get_tx_queue(sq->netdev, sq->txq_ix);
+ set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ netdev_tx_reset_queue(sq->txq);
+ netif_tx_start_queue(sq->txq);
+}
+
+void mlx5e_tx_disable_queue(struct netdev_queue *txq)
+{
+ __netif_tx_lock_bh(txq);
+ netif_tx_stop_queue(txq);
+ __netif_tx_unlock_bh(txq);
+}
+
+void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
+
+ mlx5e_tx_disable_queue(sq->txq);
+
+ /* last doorbell out, godspeed .. */
+ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_tx_wqe *nop;
+
+ sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+
+ nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
+ }
+}
+
+void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_core_dev *mdev = sq->mdev;
+ struct mlx5_rate_limit rl = {0};
+
+ cancel_work_sync(&sq->dim.work);
+ cancel_work_sync(&sq->recover_work);
+ mlx5e_destroy_sq(mdev, sq->sqn);
+ if (sq->rate_limit) {
+ rl.rate = sq->rate_limit;
+ mlx5_rl_remove_rate(mdev, &rl);
+ }
+ mlx5e_free_txqsq_descs(sq);
+ mlx5e_free_txqsq(sq);
+}
+
+void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
+{
+ struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
+ recover_work);
+
+ mlx5e_reporter_tx_err_cqe(sq);
+}
+
+static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
+{
+ struct mlx5e_create_sq_param csp = {};
+ int err;
+
+ err = mlx5e_alloc_icosq(c, param, sq, recover_work_func);
+ if (err)
+ return err;
+
+ csp.cqn = sq->cq.mcq.cqn;
+ csp.wq_ctrl = &sq->wq_ctrl;
+ csp.min_inline_mode = params->tx_min_inline_mode;
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
+ if (err)
+ goto err_free_icosq;
+
+ if (param->is_tls) {
+ sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list();
+ if (IS_ERR(sq->ktls_resync)) {
+ err = PTR_ERR(sq->ktls_resync);
+ goto err_destroy_icosq;
+ }
+ }
+ return 0;
+
+err_destroy_icosq:
+ mlx5e_destroy_sq(c->mdev, sq->sqn);
+err_free_icosq:
+ mlx5e_free_icosq(sq);
+
+ return err;
+}
+
+void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
+{
+ set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+}
+
+void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
+{
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+ synchronize_net(); /* Sync with NAPI. */
+}
+
+static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+
+ if (sq->ktls_resync)
+ mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync);
+ mlx5e_destroy_sq(c->mdev, sq->sqn);
+ mlx5e_free_icosq_descs(sq);
+ mlx5e_free_icosq(sq);
+}
+
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
+ struct mlx5e_xdpsq *sq, bool is_redirect)
+{
+ struct mlx5e_create_sq_param csp = {};
+ int err;
+
+ err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect);
+ if (err)
+ return err;
+
+ csp.tis_lst_sz = 1;
+ csp.tisn = c->priv->tisn[c->lag_port][0]; /* tc = 0 */
+ csp.cqn = sq->cq.mcq.cqn;
+ csp.wq_ctrl = &sq->wq_ctrl;
+ csp.min_inline_mode = sq->min_inline_mode;
+ set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+
+ /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet
+ * supported by upstream, and there is no defined trigger to allow
+ * transmitting redirected multi-buffer frames.
+ */
+ if (param->is_xdp_mb && !is_redirect)
+ set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
+
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
+ if (err)
+ goto err_free_xdpsq;
+
+ mlx5e_set_xmit_fp(sq, param->is_mpw);
+
+ if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
+ unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
+ unsigned int inline_hdr_sz = 0;
+ int i;
+
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ ds_cnt++;
+ }
+
+ /* Pre initialize fixed WQE fields */
+ for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg;
+
+ sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = 1,
+ .num_pkts = 1,
+ };
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+
+ dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
+ dseg->lkey = sq->mkey_be;
+ }
+ }
+
+ return 0;
+
+err_free_xdpsq:
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ mlx5e_free_xdpsq(sq);
+
+ return err;
+}
+
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ synchronize_net(); /* Sync with NAPI. */
+
+ mlx5e_destroy_sq(c->mdev, sq->sqn);
+ mlx5e_free_xdpsq_descs(sq);
+ mlx5e_free_xdpsq(sq);
+}
+
+static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ int err;
+ u32 i;
+
+ err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
+ &cq->wq_ctrl);
+ if (err)
+ return err;
+
+ mcq->cqe_sz = 64;
+ mcq->set_ci_db = cq->wq_ctrl.db.db;
+ mcq->arm_db = cq->wq_ctrl.db.db + 1;
+ *mcq->set_ci_db = 0;
+ *mcq->arm_db = 0;
+ mcq->vector = param->eq_ix;
+ mcq->comp = mlx5e_completion_event;
+ mcq->event = mlx5e_cq_error_event;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+ cqe->op_own = 0xf1;
+ }
+
+ cq->mdev = mdev;
+ cq->netdev = priv->netdev;
+ cq->priv = priv;
+
+ return 0;
+}
+
+static int mlx5e_alloc_cq(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_create_cq_param *ccp,
+ struct mlx5e_cq *cq)
+{
+ int err;
+
+ param->wq.buf_numa_node = ccp->node;
+ param->wq.db_numa_node = ccp->node;
+ param->eq_ix = ccp->ix;
+
+ err = mlx5e_alloc_cq_common(priv, param, cq);
+
+ cq->napi = ccp->napi;
+ cq->ch_stats = ccp->ch_stats;
+
+ return err;
+}
+
+static void mlx5e_free_cq(struct mlx5e_cq *cq)
+{
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
+{
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ struct mlx5_core_dev *mdev = cq->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+
+ void *in;
+ void *cqc;
+ int inlen;
+ int eqn;
+ int err;
+
+ err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+ memcpy(cqc, param->cqc, sizeof(param->cqc));
+
+ mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+
+ MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
+
+ kvfree(in);
+
+ if (err)
+ return err;
+
+ mlx5e_cq_arm(cq);
+
+ return 0;
+}
+
+static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
+{
+ mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
+}
+
+int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder,
+ struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp,
+ struct mlx5e_cq *cq)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5e_alloc_cq(priv, param, ccp, cq);
+ if (err)
+ return err;
+
+ err = mlx5e_create_cq(cq, param);
+ if (err)
+ goto err_free_cq;
+
+ if (MLX5_CAP_GEN(mdev, cq_moderation))
+ mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts);
+ return 0;
+
+err_free_cq:
+ mlx5e_free_cq(cq);
+
+ return err;
+}
+
+void mlx5e_close_cq(struct mlx5e_cq *cq)
+{
+ mlx5e_destroy_cq(cq);
+ mlx5e_free_cq(cq);
+}
+
+static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_create_cq_param *ccp,
+ struct mlx5e_channel_param *cparam)
+{
+ int err;
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->txq_sq.cqp,
+ ccp, &c->sq[tc].cq);
+ if (err)
+ goto err_close_tx_cqs;
+ }
+
+ return 0;
+
+err_close_tx_cqs:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_cq(&c->sq[tc].cq);
+
+ return err;
+}
+
+static void mlx5e_close_tx_cqs(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->sq[tc].cq);
+}
+
+static int mlx5e_mqprio_txq_to_tc(struct netdev_tc_txq *tc_to_txq, unsigned int txq)
+{
+ int tc;
+
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++)
+ if (txq - tc_to_txq[tc].offset < tc_to_txq[tc].count)
+ return tc;
+
+ WARN(1, "Unexpected TCs configuration. No match found for txq %u", txq);
+ return -ENOENT;
+}
+
+static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix,
+ u32 *hw_id)
+{
+ int tc;
+
+ if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) {
+ *hw_id = 0;
+ return 0;
+ }
+
+ tc = mlx5e_mqprio_txq_to_tc(params->mqprio.tc_to_txq, txq_ix);
+ if (tc < 0)
+ return tc;
+
+ if (tc >= params->mqprio.num_tc) {
+ WARN(1, "Unexpected TCs configuration. tc %d is out of range of %u",
+ tc, params->mqprio.num_tc);
+ return -EINVAL;
+ }
+
+ *hw_id = params->mqprio.channel.hw_id[tc];
+ return 0;
+}
+
+static int mlx5e_open_sqs(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
+{
+ int err, tc;
+
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) {
+ int txq_ix = c->ix + tc * params->num_channels;
+ u32 qos_queue_group_id;
+
+ err = mlx5e_txq_get_qos_node_hw_id(params, txq_ix, &qos_queue_group_id);
+ if (err)
+ goto err_close_sqs;
+
+ err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
+ params, &cparam->txq_sq, &c->sq[tc], tc,
+ qos_queue_group_id,
+ &c->priv->channel_stats[c->ix]->sq[tc]);
+ if (err)
+ goto err_close_sqs;
+ }
+
+ return 0;
+
+err_close_sqs:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_txqsq(&c->sq[tc]);
+
+ return err;
+}
+
+static void mlx5e_close_sqs(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_txqsq(&c->sq[tc]);
+}
+
+static int mlx5e_set_sq_maxrate(struct net_device *dev,
+ struct mlx5e_txqsq *sq, u32 rate)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_modify_sq_param msp = {0};
+ struct mlx5_rate_limit rl = {0};
+ u16 rl_index = 0;
+ int err;
+
+ if (rate == sq->rate_limit)
+ /* nothing to do */
+ return 0;
+
+ if (sq->rate_limit) {
+ rl.rate = sq->rate_limit;
+ /* remove current rl index to free space to next ones */
+ mlx5_rl_remove_rate(mdev, &rl);
+ }
+
+ sq->rate_limit = 0;
+
+ if (rate) {
+ rl.rate = rate;
+ err = mlx5_rl_add_rate(mdev, &rl_index, &rl);
+ if (err) {
+ netdev_err(dev, "Failed configuring rate %u: %d\n",
+ rate, err);
+ return err;
+ }
+ }
+
+ msp.curr_state = MLX5_SQC_STATE_RDY;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+ msp.rl_index = rl_index;
+ msp.rl_update = true;
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed configuring rate %u: %d\n",
+ rate, err);
+ /* remove the rate from the table */
+ if (rate)
+ mlx5_rl_remove_rate(mdev, &rl);
+ return err;
+ }
+
+ sq->rate_limit = rate;
+ return 0;
+}
+
+static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_txqsq *sq = priv->txq2sq[index];
+ int err = 0;
+
+ if (!mlx5_rl_is_supported(mdev)) {
+ netdev_err(dev, "Rate limiting is not supported on this device\n");
+ return -EINVAL;
+ }
+
+ /* rate is given in Mb/sec, HW config is in Kb/sec */
+ rate = rate << 10;
+
+ /* Check whether rate in valid range, 0 is always valid */
+ if (rate && !mlx5_rl_is_in_range(mdev, rate)) {
+ netdev_err(dev, "TX rate %u, is not in range\n", rate);
+ return -ERANGE;
+ }
+
+ mutex_lock(&priv->state_lock);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ err = mlx5e_set_sq_maxrate(dev, sq, rate);
+ if (!err)
+ priv->tx_rates[index] = rate;
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_params)
+{
+ int err;
+
+ err = mlx5e_init_rxq_rq(c, params, &c->rq);
+ if (err)
+ return err;
+
+ return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq);
+}
+
+static int mlx5e_open_queues(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
+{
+ struct dim_cq_moder icocq_moder = {0, 0};
+ struct mlx5e_create_cq_param ccp;
+ int err;
+
+ mlx5e_build_create_cq_param(&ccp, c);
+
+ err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
+ &c->async_icosq.cq);
+ if (err)
+ return err;
+
+ err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
+ &c->icosq.cq);
+ if (err)
+ goto err_close_async_icosq_cq;
+
+ err = mlx5e_open_tx_cqs(c, params, &ccp, cparam);
+ if (err)
+ goto err_close_icosq_cq;
+
+ err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp,
+ &c->xdpsq.cq);
+ if (err)
+ goto err_close_tx_cqs;
+
+ err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
+ &c->rq.cq);
+ if (err)
+ goto err_close_xdp_tx_cqs;
+
+ err = c->xdp ? mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp,
+ &ccp, &c->rq_xdpsq.cq) : 0;
+ if (err)
+ goto err_close_rx_cq;
+
+ spin_lock_init(&c->async_icosq_lock);
+
+ err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq,
+ mlx5e_async_icosq_err_cqe_work);
+ if (err)
+ goto err_close_xdpsq_cq;
+
+ mutex_init(&c->icosq_recovery_lock);
+
+ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq,
+ mlx5e_icosq_err_cqe_work);
+ if (err)
+ goto err_close_async_icosq;
+
+ err = mlx5e_open_sqs(c, params, cparam);
+ if (err)
+ goto err_close_icosq;
+
+ err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
+ if (err)
+ goto err_close_sqs;
+
+ if (c->xdp) {
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
+ &c->rq_xdpsq, false);
+ if (err)
+ goto err_close_rq;
+ }
+
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
+ if (err)
+ goto err_close_xdp_sq;
+
+ return 0;
+
+err_close_xdp_sq:
+ if (c->xdp)
+ mlx5e_close_xdpsq(&c->rq_xdpsq);
+
+err_close_rq:
+ mlx5e_close_rq(&c->rq);
+
+err_close_sqs:
+ mlx5e_close_sqs(c);
+
+err_close_icosq:
+ mlx5e_close_icosq(&c->icosq);
+
+err_close_async_icosq:
+ mlx5e_close_icosq(&c->async_icosq);
+
+err_close_xdpsq_cq:
+ if (c->xdp)
+ mlx5e_close_cq(&c->rq_xdpsq.cq);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->rq.cq);
+
+err_close_xdp_tx_cqs:
+ mlx5e_close_cq(&c->xdpsq.cq);
+
+err_close_tx_cqs:
+ mlx5e_close_tx_cqs(c);
+
+err_close_icosq_cq:
+ mlx5e_close_cq(&c->icosq.cq);
+
+err_close_async_icosq_cq:
+ mlx5e_close_cq(&c->async_icosq.cq);
+
+ return err;
+}
+
+static void mlx5e_close_queues(struct mlx5e_channel *c)
+{
+ mlx5e_close_xdpsq(&c->xdpsq);
+ if (c->xdp)
+ mlx5e_close_xdpsq(&c->rq_xdpsq);
+ /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */
+ cancel_work_sync(&c->icosq.recover_work);
+ mlx5e_close_rq(&c->rq);
+ mlx5e_close_sqs(c);
+ mlx5e_close_icosq(&c->icosq);
+ mutex_destroy(&c->icosq_recovery_lock);
+ mlx5e_close_icosq(&c->async_icosq);
+ if (c->xdp)
+ mlx5e_close_cq(&c->rq_xdpsq.cq);
+ mlx5e_close_cq(&c->rq.cq);
+ mlx5e_close_cq(&c->xdpsq.cq);
+ mlx5e_close_tx_cqs(c);
+ mlx5e_close_cq(&c->icosq.cq);
+ mlx5e_close_cq(&c->async_icosq.cq);
+}
+
+static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
+{
+ u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id);
+
+ return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev);
+}
+
+static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu)
+{
+ if (ix > priv->stats_nch) {
+ netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix,
+ priv->stats_nch);
+ return -EINVAL;
+ }
+
+ if (priv->channel_stats[ix])
+ return 0;
+
+ /* Asymmetric dynamic memory allocation.
+ * Freed in mlx5e_priv_arrays_free, not on channel closure.
+ */
+ mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix);
+ priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!priv->channel_stats[ix])
+ return -ENOMEM;
+ priv->stats_nch++;
+
+ return 0;
+}
+
+void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c)
+{
+ spin_lock_bh(&c->async_icosq_lock);
+ mlx5e_trigger_irq(&c->async_icosq);
+ spin_unlock_bh(&c->async_icosq_lock);
+}
+
+void mlx5e_trigger_napi_sched(struct napi_struct *napi)
+{
+ local_bh_disable();
+ napi_schedule(napi);
+ local_bh_enable();
+}
+
+static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam,
+ struct xsk_buff_pool *xsk_pool,
+ struct mlx5e_channel **cp)
+{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ unsigned int irq;
+ int err;
+
+ err = mlx5_vector2irqn(priv->mdev, ix, &irq);
+ if (err)
+ return err;
+
+ err = mlx5e_channel_stats_alloc(priv, ix, cpu);
+ if (err)
+ return err;
+
+ c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
+ if (!c)
+ return -ENOMEM;
+
+ c->priv = priv;
+ c->mdev = priv->mdev;
+ c->tstamp = &priv->tstamp;
+ c->ix = ix;
+ c->cpu = cpu;
+ c->pdev = mlx5_core_dma_dev(priv->mdev);
+ c->netdev = priv->netdev;
+ c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
+ c->num_tc = mlx5e_get_dcb_num_tc(params);
+ c->xdp = !!params->xdp_prog;
+ c->stats = &priv->channel_stats[ix]->ch;
+ c->aff_mask = irq_get_effective_affinity_mask(irq);
+ c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
+
+ netif_napi_add(netdev, &c->napi, mlx5e_napi_poll);
+
+ err = mlx5e_open_queues(c, params, cparam);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ if (xsk_pool) {
+ mlx5e_build_xsk_param(xsk_pool, &xsk);
+ err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c);
+ if (unlikely(err))
+ goto err_close_queues;
+ }
+
+ *cp = c;
+
+ return 0;
+
+err_close_queues:
+ mlx5e_close_queues(c);
+
+err_napi_del:
+ netif_napi_del(&c->napi);
+
+ kvfree(c);
+
+ return err;
+}
+
+static void mlx5e_activate_channel(struct mlx5e_channel *c)
+{
+ int tc;
+
+ napi_enable(&c->napi);
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_activate_txqsq(&c->sq[tc]);
+ mlx5e_activate_icosq(&c->icosq);
+ mlx5e_activate_icosq(&c->async_icosq);
+
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_activate_xsk(c);
+ else
+ mlx5e_activate_rq(&c->rq);
+
+ mlx5e_trigger_napi_icosq(c);
+}
+
+static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
+{
+ int tc;
+
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_deactivate_xsk(c);
+ else
+ mlx5e_deactivate_rq(&c->rq);
+
+ mlx5e_deactivate_icosq(&c->async_icosq);
+ mlx5e_deactivate_icosq(&c->icosq);
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_deactivate_txqsq(&c->sq[tc]);
+ mlx5e_qos_deactivate_queues(c);
+
+ napi_disable(&c->napi);
+}
+
+static void mlx5e_close_channel(struct mlx5e_channel *c)
+{
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_close_xsk(c);
+ mlx5e_close_queues(c);
+ mlx5e_qos_close_queues(c);
+ netif_napi_del(&c->napi);
+
+ kvfree(c);
+}
+
+int mlx5e_open_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs)
+{
+ struct mlx5e_channel_param *cparam;
+ int err = -ENOMEM;
+ int i;
+
+ chs->num = chs->params.num_channels;
+
+ chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL);
+ cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
+ if (!chs->c || !cparam)
+ goto err_free;
+
+ err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
+ if (err)
+ goto err_free;
+
+ for (i = 0; i < chs->num; i++) {
+ struct xsk_buff_pool *xsk_pool = NULL;
+
+ if (chs->params.xdp_prog)
+ xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i);
+
+ err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]);
+ if (err)
+ goto err_close_channels;
+ }
+
+ if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) {
+ err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
+ if (err)
+ goto err_close_channels;
+ }
+
+ if (priv->htb) {
+ err = mlx5e_qos_open_queues(priv, chs);
+ if (err)
+ goto err_close_ptp;
+ }
+
+ mlx5e_health_channels_update(priv);
+ kvfree(cparam);
+ return 0;
+
+err_close_ptp:
+ if (chs->ptp)
+ mlx5e_ptp_close(chs->ptp);
+
+err_close_channels:
+ for (i--; i >= 0; i--)
+ mlx5e_close_channel(chs->c[i]);
+
+err_free:
+ kfree(chs->c);
+ kvfree(cparam);
+ chs->num = 0;
+ return err;
+}
+
+static void mlx5e_activate_channels(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_activate_channel(chs->c[i]);
+
+ if (chs->ptp)
+ mlx5e_ptp_activate_channel(chs->ptp);
+}
+
+static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < chs->num; i++) {
+ int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
+ struct mlx5e_channel *c = chs->c[i];
+
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ continue;
+
+ err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout);
+
+ /* Don't wait on the XSK RQ, because the newer xdpsock sample
+ * doesn't provide any Fill Ring entries at the setup stage.
+ */
+ }
+
+ return err ? -ETIMEDOUT : 0;
+}
+
+static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
+{
+ int i;
+
+ if (chs->ptp)
+ mlx5e_ptp_deactivate_channel(chs->ptp);
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_deactivate_channel(chs->c[i]);
+}
+
+void mlx5e_close_channels(struct mlx5e_channels *chs)
+{
+ int i;
+
+ ASSERT_RTNL();
+ if (chs->ptp) {
+ mlx5e_ptp_close(chs->ptp);
+ chs->ptp = NULL;
+ }
+ for (i = 0; i < chs->num; i++)
+ mlx5e_close_channel(chs->c[i]);
+
+ kfree(chs->c);
+ chs->num = 0;
+}
+
+static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rx_res *res = priv->rx_res;
+
+ return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge);
+}
+
+static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge);
+
+static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 mtu)
+{
+ u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
+ int err;
+
+ err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
+ if (err)
+ return err;
+
+ /* Update vport context MTU */
+ mlx5_modify_nic_vport_mtu(mdev, hw_mtu);
+ return 0;
+}
+
+static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 *mtu)
+{
+ u16 hw_mtu = 0;
+ int err;
+
+ err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
+ if (err || !hw_mtu) /* fallback to port oper mtu */
+ mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
+
+ *mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
+}
+
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 mtu;
+ int err;
+
+ err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
+ if (err)
+ return err;
+
+ mlx5e_query_mtu(mdev, params, &mtu);
+ if (mtu != params->sw_mtu)
+ netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
+ __func__, mtu, params->sw_mtu);
+
+ params->sw_mtu = mtu;
+ return 0;
+}
+
+MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu);
+
+void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
+
+ /* MTU range: 68 - hw-specific max */
+ netdev->min_mtu = ETH_MIN_MTU;
+
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
+ ETH_MAX_MTU);
+}
+
+static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc,
+ struct netdev_tc_txq *tc_to_txq)
+{
+ int tc, err;
+
+ netdev_reset_tc(netdev);
+
+ if (ntc == 1)
+ return 0;
+
+ err = netdev_set_num_tc(netdev, ntc);
+ if (err) {
+ netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc);
+ return err;
+ }
+
+ for (tc = 0; tc < ntc; tc++) {
+ u16 count, offset;
+
+ count = tc_to_txq[tc].count;
+ offset = tc_to_txq[tc].offset;
+ netdev_set_tc_queue(netdev, tc, count, offset);
+ }
+
+ return 0;
+}
+
+int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
+{
+ int nch, ntc, num_txqs, err;
+ int qos_queues = 0;
+
+ if (priv->htb)
+ qos_queues = mlx5e_htb_cur_leaf_nodes(priv->htb);
+
+ nch = priv->channels.params.num_channels;
+ ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
+ num_txqs = nch * ntc + qos_queues;
+ if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
+ num_txqs += ntc;
+
+ mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs);
+ err = netif_set_real_num_tx_queues(priv->netdev, num_txqs);
+ if (err)
+ netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
+
+ return err;
+}
+
+static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
+{
+ struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq;
+ struct net_device *netdev = priv->netdev;
+ int old_num_txqs, old_ntc;
+ int nch, ntc;
+ int err;
+ int i;
+
+ old_num_txqs = netdev->real_num_tx_queues;
+ old_ntc = netdev->num_tc ? : 1;
+ for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++)
+ old_tc_to_txq[i] = netdev->tc_to_txq[i];
+
+ nch = priv->channels.params.num_channels;
+ ntc = priv->channels.params.mqprio.num_tc;
+ tc_to_txq = priv->channels.params.mqprio.tc_to_txq;
+
+ err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq);
+ if (err)
+ goto err_out;
+ err = mlx5e_update_tx_netdev_queues(priv);
+ if (err)
+ goto err_tcs;
+ err = netif_set_real_num_rx_queues(netdev, nch);
+ if (err) {
+ netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
+ goto err_txqs;
+ }
+
+ return 0;
+
+err_txqs:
+ /* netif_set_real_num_rx_queues could fail only when nch increased. Only
+ * one of nch and ntc is changed in this function. That means, the call
+ * to netif_set_real_num_tx_queues below should not fail, because it
+ * decreases the number of TX queues.
+ */
+ WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs));
+
+err_tcs:
+ WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc,
+ old_tc_to_txq));
+err_out:
+ return err;
+}
+
+static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues);
+
+static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
+ struct mlx5e_params *params)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int num_comp_vectors, ix, irq;
+
+ num_comp_vectors = mlx5_comp_vectors_count(mdev);
+
+ for (ix = 0; ix < params->num_channels; ix++) {
+ cpumask_clear(priv->scratchpad.cpumask);
+
+ for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq));
+
+ cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
+ }
+
+ netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix);
+ }
+}
+
+static int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
+{
+ u16 count = priv->channels.params.num_channels;
+ int err;
+
+ err = mlx5e_update_netdev_queues(priv);
+ if (err)
+ return err;
+
+ mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
+
+ /* This function may be called on attach, before priv->rx_res is created. */
+ if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
+ mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
+
+ return 0;
+}
+
+MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed);
+
+static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
+{
+ int i, ch, tc, num_tc;
+
+ ch = priv->channels.num;
+ num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
+
+ for (i = 0; i < ch; i++) {
+ for (tc = 0; tc < num_tc; tc++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5e_txqsq *sq = &c->sq[tc];
+
+ priv->txq2sq[sq->txq_ix] = sq;
+ }
+ }
+
+ if (!priv->channels.ptp)
+ goto out;
+
+ if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
+ goto out;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ struct mlx5e_ptp *c = priv->channels.ptp;
+ struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
+
+ priv->txq2sq[sq->txq_ix] = sq;
+ }
+
+out:
+ /* Make the change to txq2sq visible before the queue is started.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
+}
+
+void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
+{
+ mlx5e_build_txq_maps(priv);
+ mlx5e_activate_channels(&priv->channels);
+ if (priv->htb)
+ mlx5e_qos_activate_queues(priv);
+ mlx5e_xdp_tx_enable(priv);
+
+ /* dev_watchdog() wants all TX queues to be started when the carrier is
+ * OK, including the ones in range real_num_tx_queues..num_tx_queues-1.
+ * Make it happy to avoid TX timeout false alarms.
+ */
+ netif_tx_start_all_queues(priv->netdev);
+
+ if (mlx5e_is_vport_rep(priv))
+ mlx5e_rep_activate_channels(priv);
+
+ set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state);
+
+ mlx5e_wait_channels_min_rx_wqes(&priv->channels);
+
+ if (priv->rx_res)
+ mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
+}
+
+static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv)
+{
+ WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state));
+ if (current_work() != &priv->tx_timeout_work)
+ cancel_work_sync(&priv->tx_timeout_work);
+}
+
+void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
+{
+ if (priv->rx_res)
+ mlx5e_rx_res_channels_deactivate(priv->rx_res);
+
+ clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state);
+ mlx5e_cancel_tx_timeout_work(priv);
+
+ if (mlx5e_is_vport_rep(priv))
+ mlx5e_rep_deactivate_channels(priv);
+
+ /* The results of ndo_select_queue are unreliable, while netdev config
+ * is being changed (real_num_tx_queues, num_tc). Stop all queues to
+ * prevent ndo_start_xmit from being called, so that it can assume that
+ * the selected queue is always valid.
+ */
+ netif_tx_disable(priv->netdev);
+
+ mlx5e_xdp_tx_disable(priv);
+ mlx5e_deactivate_channels(&priv->channels);
+}
+
+static int mlx5e_switch_priv_params(struct mlx5e_priv *priv,
+ struct mlx5e_params *new_params,
+ mlx5e_fp_preactivate preactivate,
+ void *context)
+{
+ struct mlx5e_params old_params;
+
+ old_params = priv->channels.params;
+ priv->channels.params = *new_params;
+
+ if (preactivate) {
+ int err;
+
+ err = preactivate(priv, context);
+ if (err) {
+ priv->channels.params = old_params;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *new_chs,
+ mlx5e_fp_preactivate preactivate,
+ void *context)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_channels old_chs;
+ int carrier_ok;
+ int err = 0;
+
+ carrier_ok = netif_carrier_ok(netdev);
+ netif_carrier_off(netdev);
+
+ mlx5e_deactivate_priv_channels(priv);
+
+ old_chs = priv->channels;
+ priv->channels = *new_chs;
+
+ /* New channels are ready to roll, call the preactivate hook if needed
+ * to modify HW settings or update kernel parameters.
+ */
+ if (preactivate) {
+ err = preactivate(priv, context);
+ if (err) {
+ priv->channels = old_chs;
+ goto out;
+ }
+ }
+
+ mlx5e_close_channels(&old_chs);
+ priv->profile->update_rx(priv);
+
+ mlx5e_selq_apply(&priv->selq);
+out:
+ mlx5e_activate_priv_channels(priv);
+
+ /* return carrier back if needed */
+ if (carrier_ok)
+ netif_carrier_on(netdev);
+
+ return err;
+}
+
+int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ mlx5e_fp_preactivate preactivate,
+ void *context, bool reset)
+{
+ struct mlx5e_channels new_chs = {};
+ int err;
+
+ reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (!reset)
+ return mlx5e_switch_priv_params(priv, params, preactivate, context);
+
+ new_chs.params = *params;
+
+ mlx5e_selq_prepare_params(&priv->selq, &new_chs.params);
+
+ err = mlx5e_open_channels(priv, &new_chs);
+ if (err)
+ goto err_cancel_selq;
+
+ err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
+ if (err)
+ goto err_close;
+
+ return 0;
+
+err_close:
+ mlx5e_close_channels(&new_chs);
+
+err_cancel_selq:
+ mlx5e_selq_cancel(&priv->selq);
+ return err;
+}
+
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
+{
+ return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true);
+}
+
+void mlx5e_timestamp_init(struct mlx5e_priv *priv)
+{
+ priv->tstamp.tx_type = HWTSTAMP_TX_OFF;
+ priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
+}
+
+static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev,
+ enum mlx5_port_status state)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int vport_admin_state;
+
+ mlx5_set_port_admin_status(mdev, state);
+
+ if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS ||
+ !MLX5_CAP_GEN(mdev, uplink_follow))
+ return;
+
+ if (state == MLX5_PORT_UP)
+ vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ else
+ vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN;
+
+ mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state);
+}
+
+int mlx5e_open_locked(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mlx5e_selq_prepare_params(&priv->selq, &priv->channels.params);
+
+ set_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ err = mlx5e_open_channels(priv, &priv->channels);
+ if (err)
+ goto err_clear_state_opened_flag;
+
+ priv->profile->update_rx(priv);
+ mlx5e_selq_apply(&priv->selq);
+ mlx5e_activate_priv_channels(priv);
+ mlx5e_apply_traps(priv, true);
+ if (priv->profile->update_carrier)
+ priv->profile->update_carrier(priv);
+
+ mlx5e_queue_update_stats(priv);
+ return 0;
+
+err_clear_state_opened_flag:
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+ mlx5e_selq_cancel(&priv->selq);
+ return err;
+}
+
+int mlx5e_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_open_locked(netdev);
+ if (!err)
+ mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_close_locked(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ /* May already be CLOSED in case a previous configuration operation
+ * (e.g RX/TX queue size change) that involves close&open failed.
+ */
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ mlx5e_apply_traps(priv, false);
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ netif_carrier_off(priv->netdev);
+ mlx5e_deactivate_priv_channels(priv);
+ mlx5e_close_channels(&priv->channels);
+
+ return 0;
+}
+
+int mlx5e_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (!netif_device_present(netdev))
+ return -ENODEV;
+
+ mutex_lock(&priv->state_lock);
+ mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN);
+ err = mlx5e_close_locked(netdev);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static void mlx5e_free_drop_rq(struct mlx5e_rq *rq)
+{
+ mlx5_wq_destroy(&rq->wq_ctrl);
+}
+
+static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq *rq,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int err;
+
+ param->wq.db_numa_node = param->wq.buf_numa_node;
+
+ err = mlx5_wq_cyc_create(mdev, &param->wq, rqc_wq, &rq->wqe.wq,
+ &rq->wq_ctrl);
+ if (err)
+ return err;
+
+ /* Mark as unused given "Drop-RQ" packets never reach XDP */
+ xdp_rxq_info_unused(&rq->xdp_rxq);
+
+ rq->mdev = mdev;
+
+ return 0;
+}
+
+static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv,
+ struct mlx5e_cq *cq,
+ struct mlx5e_cq_param *param)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ param->wq.db_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+
+ return mlx5e_alloc_cq_common(priv, param, cq);
+}
+
+int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+ struct mlx5e_rq *drop_rq)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_cq_param cq_param = {};
+ struct mlx5e_rq_param rq_param = {};
+ struct mlx5e_cq *cq = &drop_rq->cq;
+ int err;
+
+ mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
+
+ err = mlx5e_alloc_drop_cq(priv, cq, &cq_param);
+ if (err)
+ return err;
+
+ err = mlx5e_create_cq(cq, &cq_param);
+ if (err)
+ goto err_free_cq;
+
+ err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param);
+ if (err)
+ goto err_destroy_cq;
+
+ err = mlx5e_create_rq(drop_rq, &rq_param);
+ if (err)
+ goto err_free_rq;
+
+ err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err)
+ mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err);
+
+ return 0;
+
+err_free_rq:
+ mlx5e_free_drop_rq(drop_rq);
+
+err_destroy_cq:
+ mlx5e_destroy_cq(cq);
+
+err_free_cq:
+ mlx5e_free_cq(cq);
+
+ return err;
+}
+
+void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
+{
+ mlx5e_destroy_rq(drop_rq);
+ mlx5e_free_drop_rq(drop_rq);
+ mlx5e_destroy_cq(&drop_rq->cq);
+ mlx5e_free_cq(&drop_rq->cq);
+}
+
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
+{
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
+
+ if (MLX5_GET(tisc, tisc, tls_en))
+ MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
+
+ if (mlx5_lag_is_lacp_owner(mdev))
+ MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
+
+ return mlx5_core_create_tis(mdev, in, tisn);
+}
+
+void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
+{
+ mlx5_core_destroy_tis(mdev, tisn);
+}
+
+void mlx5e_destroy_tises(struct mlx5e_priv *priv)
+{
+ int tc, i;
+
+ for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++)
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
+}
+
+static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1;
+}
+
+int mlx5e_create_tises(struct mlx5e_priv *priv)
+{
+ int tc, i;
+ int err;
+
+ for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) {
+ for (tc = 0; tc < priv->profile->max_tc; tc++) {
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+ void *tisc;
+
+ tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, prio, tc << 1);
+
+ if (mlx5e_lag_should_assign_affinity(priv->mdev))
+ MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1);
+
+ err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]);
+ if (err)
+ goto err_close_tises;
+ }
+ }
+
+ return 0;
+
+err_close_tises:
+ for (; i >= 0; i--) {
+ for (tc--; tc >= 0; tc--)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
+ tc = priv->profile->max_tc;
+ }
+
+ return err;
+}
+
+static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
+{
+ if (priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ priv->mqprio_rl = NULL;
+ }
+ mlx5e_accel_cleanup_tx(priv);
+ mlx5e_destroy_tises(priv);
+}
+
+static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
+ if (err)
+ return err;
+ }
+ if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
+ return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
+
+ return 0;
+}
+
+static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
+ int ntc, int nch)
+{
+ int tc;
+
+ memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE);
+
+ /* Map netdev TCs to offset 0.
+ * We have our own UP to TXQ mapping for DCB mode of QoS
+ */
+ for (tc = 0; tc < ntc; tc++) {
+ tc_to_txq[tc] = (struct netdev_tc_txq) {
+ .count = nch,
+ .offset = 0,
+ };
+ }
+}
+
+static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
+ struct tc_mqprio_qopt *qopt)
+{
+ int tc;
+
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
+ tc_to_txq[tc] = (struct netdev_tc_txq) {
+ .count = qopt->count[tc],
+ .offset = qopt->offset[tc],
+ };
+ }
+}
+
+static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc)
+{
+ params->mqprio.mode = TC_MQPRIO_MODE_DCB;
+ params->mqprio.num_tc = num_tc;
+ mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc,
+ params->num_channels);
+}
+
+static void mlx5e_mqprio_rl_update_params(struct mlx5e_params *params,
+ struct mlx5e_mqprio_rl *rl)
+{
+ int tc;
+
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
+ u32 hw_id = 0;
+
+ if (rl)
+ mlx5e_mqprio_rl_get_node_hw_id(rl, tc, &hw_id);
+ params->mqprio.channel.hw_id[tc] = hw_id;
+ }
+}
+
+static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params,
+ struct tc_mqprio_qopt_offload *mqprio,
+ struct mlx5e_mqprio_rl *rl)
+{
+ int tc;
+
+ params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL;
+ params->mqprio.num_tc = mqprio->qopt.num_tc;
+
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++)
+ params->mqprio.channel.max_rate[tc] = mqprio->max_rate[tc];
+
+ mlx5e_mqprio_rl_update_params(params, rl);
+ mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, &mqprio->qopt);
+}
+
+static void mlx5e_params_mqprio_reset(struct mlx5e_params *params)
+{
+ mlx5e_params_mqprio_dcb_set(params, 1);
+}
+
+static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt *mqprio)
+{
+ struct mlx5e_params new_params;
+ u8 tc = mqprio->num_tc;
+ int err;
+
+ mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+ if (tc && tc != MLX5E_MAX_NUM_TC)
+ return -EINVAL;
+
+ new_params = priv->channels.params;
+ mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1);
+
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_num_channels_changed_ctx, NULL, true);
+
+ if (!err && priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ priv->mqprio_rl = NULL;
+ }
+
+ priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
+ mlx5e_get_dcb_num_tc(&priv->channels.params));
+ return err;
+}
+
+static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt_offload *mqprio)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_ptp *ptp_channel;
+ int agg_count = 0;
+ int i;
+
+ ptp_channel = priv->channels.ptp;
+ if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) {
+ netdev_err(netdev,
+ "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n");
+ return -EINVAL;
+ }
+
+ if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 ||
+ mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC)
+ return -EINVAL;
+
+ for (i = 0; i < mqprio->qopt.num_tc; i++) {
+ if (!mqprio->qopt.count[i]) {
+ netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i);
+ return -EINVAL;
+ }
+ if (mqprio->min_rate[i]) {
+ netdev_err(netdev, "Min tx rate is not supported\n");
+ return -EINVAL;
+ }
+
+ if (mqprio->max_rate[i]) {
+ int err;
+
+ err = mlx5e_qos_bytes_rate_check(priv->mdev, mqprio->max_rate[i]);
+ if (err)
+ return err;
+ }
+
+ if (mqprio->qopt.offset[i] != agg_count) {
+ netdev_err(netdev, "Discontinuous queues config is not supported\n");
+ return -EINVAL;
+ }
+ agg_count += mqprio->qopt.count[i];
+ }
+
+ if (priv->channels.params.num_channels != agg_count) {
+ netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n",
+ agg_count, priv->channels.params.num_channels);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static bool mlx5e_mqprio_rate_limit(u8 num_tc, u64 max_rate[])
+{
+ int tc;
+
+ for (tc = 0; tc < num_tc; tc++)
+ if (max_rate[tc])
+ return true;
+ return false;
+}
+
+static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev,
+ u8 num_tc, u64 max_rate[])
+{
+ struct mlx5e_mqprio_rl *rl;
+ int err;
+
+ if (!mlx5e_mqprio_rate_limit(num_tc, max_rate))
+ return NULL;
+
+ rl = mlx5e_mqprio_rl_alloc();
+ if (!rl)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_mqprio_rl_init(rl, mdev, num_tc, max_rate);
+ if (err) {
+ mlx5e_mqprio_rl_free(rl);
+ return ERR_PTR(err);
+ }
+
+ return rl;
+}
+
+static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt_offload *mqprio)
+{
+ mlx5e_fp_preactivate preactivate;
+ struct mlx5e_params new_params;
+ struct mlx5e_mqprio_rl *rl;
+ bool nch_changed;
+ int err;
+
+ err = mlx5e_mqprio_channel_validate(priv, mqprio);
+ if (err)
+ return err;
+
+ rl = mlx5e_mqprio_rl_create(priv->mdev, mqprio->qopt.num_tc, mqprio->max_rate);
+ if (IS_ERR(rl))
+ return PTR_ERR(rl);
+
+ new_params = priv->channels.params;
+ mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl);
+
+ nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1;
+ preactivate = nch_changed ? mlx5e_num_channels_changed_ctx :
+ mlx5e_update_netdev_queues_ctx;
+ err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true);
+ if (err) {
+ if (rl) {
+ mlx5e_mqprio_rl_cleanup(rl);
+ mlx5e_mqprio_rl_free(rl);
+ }
+ return err;
+ }
+
+ if (priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ }
+ priv->mqprio_rl = rl;
+
+ return 0;
+}
+
+static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
+ struct tc_mqprio_qopt_offload *mqprio)
+{
+ /* MQPRIO is another toplevel qdisc that can't be attached
+ * simultaneously with the offloaded HTB.
+ */
+ if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq)))
+ return -EINVAL;
+
+ switch (mqprio->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt);
+ case TC_MQPRIO_MODE_CHANNEL:
+ return mlx5e_setup_tc_mqprio_channel(priv, mqprio);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
+static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ bool tc_unbind = false;
+ int err;
+
+ if (type == TC_SETUP_BLOCK &&
+ ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND)
+ tc_unbind = true;
+
+ if (!netif_device_present(dev) && !tc_unbind)
+ return -ENODEV;
+
+ switch (type) {
+ case TC_SETUP_BLOCK: {
+ struct flow_block_offload *f = type_data;
+
+ f->unlocked_driver_cb = true;
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_block_cb_list,
+ mlx5e_setup_tc_block_cb,
+ priv, priv, true);
+ }
+ case TC_SETUP_QDISC_MQPRIO:
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_setup_tc_mqprio(priv, type_data);
+ mutex_unlock(&priv->state_lock);
+ return err;
+ case TC_SETUP_QDISC_HTB:
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_htb_setup_tc(priv, type_data);
+ mutex_unlock(&priv->state_lock);
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
+{
+ int i;
+
+ for (i = 0; i < priv->stats_nch; i++) {
+ struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
+ struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
+ struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
+ int j;
+
+ s->rx_packets += rq_stats->packets + xskrq_stats->packets;
+ s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes;
+ s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets;
+
+ for (j = 0; j < priv->max_opened_tc; j++) {
+ struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_dropped += sq_stats->dropped;
+ }
+ }
+ if (priv->tx_ptp_opened) {
+ for (i = 0; i < priv->max_opened_tc; i++) {
+ struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i];
+
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_dropped += sq_stats->dropped;
+ }
+ }
+ if (priv->rx_ptp_opened) {
+ struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
+
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
+ s->multicast += rq_stats->mcast_packets;
+ }
+}
+
+void
+mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+
+ if (!netif_device_present(dev))
+ return;
+
+ /* In switchdev mode, monitor counters doesn't monitor
+ * rx/tx stats of 802_3. The update stats mechanism
+ * should keep the 802_3 layout counters updated
+ */
+ if (!mlx5e_monitor_counter_supported(priv) ||
+ mlx5e_is_uplink_rep(priv)) {
+ /* update HW stats in background for next time */
+ mlx5e_queue_update_stats(priv);
+ }
+
+ if (mlx5e_is_uplink_rep(priv)) {
+ struct mlx5e_vport_stats *vstats = &priv->stats.vport;
+
+ stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+ stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok);
+ stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+ stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+
+ /* vport multicast also counts packets that are dropped due to steering
+ * or rx out of buffer
+ */
+ stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
+ } else {
+ mlx5e_fold_sw_stats64(priv, stats);
+ }
+
+ stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
+
+ stats->rx_length_errors =
+ PPORT_802_3_GET(pstats, a_in_range_length_errors) +
+ PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
+ PPORT_802_3_GET(pstats, a_frame_too_long_errors) +
+ VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small);
+ stats->rx_crc_errors =
+ PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
+ stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
+ stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
+ stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
+ stats->rx_frame_errors;
+ stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
+}
+
+static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
+{
+ if (mlx5e_is_uplink_rep(priv))
+ return; /* no rx mode for uplink rep */
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+}
+
+static void mlx5e_set_rx_mode(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_nic_set_rx_mode(priv);
+}
+
+static int mlx5e_set_mac(struct net_device *netdev, void *addr)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct sockaddr *saddr = addr;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ netif_addr_lock_bh(netdev);
+ eth_hw_addr_set(netdev, saddr->sa_data);
+ netif_addr_unlock_bh(netdev);
+
+ mlx5e_nic_set_rx_mode(priv);
+
+ return 0;
+}
+
+#define MLX5E_SET_FEATURE(features, feature, enable) \
+ do { \
+ if (enable) \
+ *features |= feature; \
+ else \
+ *features &= ~feature; \
+ } while (0)
+
+typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
+
+static int set_feature_lro(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params *cur_params;
+ struct mlx5e_params new_params;
+ bool reset = true;
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ cur_params = &priv->channels.params;
+ new_params = *cur_params;
+
+ if (enable)
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO;
+ else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+ else
+ goto out;
+
+ if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO &&
+ new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) {
+ if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
+ reset = false;
+ }
+ }
+
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int set_feature_hw_gro(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params new_params;
+ bool reset = true;
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+ new_params = priv->channels.params;
+
+ if (enable) {
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
+ new_params.packet_merge.shampo.match_criteria_type =
+ MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
+ new_params.packet_merge.shampo.alignment_granularity =
+ MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE;
+ } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+ } else {
+ goto out;
+ }
+
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (enable)
+ mlx5e_enable_cvlan_filter(priv->fs,
+ !!(priv->netdev->flags & IFF_PROMISC));
+ else
+ mlx5e_disable_cvlan_filter(priv->fs,
+ !!(priv->netdev->flags & IFF_PROMISC));
+
+ return 0;
+}
+
+static int set_feature_hw_tc(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+ int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) :
+ MLX5_TC_FLAG(NIC_OFFLOAD);
+ if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) {
+ netdev_err(netdev,
+ "Active offloaded tc filters, can't turn hw_tc_offload off\n");
+ return -EINVAL;
+ }
+#endif
+
+ mutex_lock(&priv->state_lock);
+ if (!enable && mlx5e_selq_is_htb_enabled(&priv->selq)) {
+ netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n");
+ err = -EINVAL;
+ }
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int set_feature_rx_all(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_set_port_fcs(mdev, !enable);
+}
+
+static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {};
+ bool supported, curr_state;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, ports_check))
+ return 0;
+
+ err = mlx5_query_ports_check(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap);
+ curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc);
+
+ if (!supported || enable == curr_state)
+ return 0;
+
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable);
+
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ bool enable = *(bool *)ctx;
+
+ return mlx5e_set_rx_port_ts(mdev, enable);
+}
+
+static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels *chs = &priv->channels;
+ struct mlx5e_params new_params;
+ int err;
+ bool rx_ts_over_crc = !enable;
+
+ mutex_lock(&priv->state_lock);
+
+ new_params = chs->params;
+ new_params.scatter_fcs_en = enable;
+ err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap,
+ &rx_ts_over_crc, true);
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable);
+ priv->channels.params.vlan_strip_disable = !enable;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
+ if (err) {
+ mlx5e_fs_set_vlan_strip_disable(priv->fs, enable);
+ priv->channels.params.vlan_strip_disable = enable;
+ }
+unlock:
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_flow_steering *fs = priv->fs;
+
+ if (mlx5e_is_uplink_rep(priv))
+ return 0; /* no vlan table for uplink rep */
+
+ return mlx5e_fs_vlan_rx_add_vid(fs, dev, proto, vid);
+}
+
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_flow_steering *fs = priv->fs;
+
+ if (mlx5e_is_uplink_rep(priv))
+ return 0; /* no vlan table for uplink rep */
+
+ return mlx5e_fs_vlan_rx_kill_vid(fs, dev, proto, vid);
+}
+
+#ifdef CONFIG_MLX5_EN_ARFS
+static int set_feature_arfs(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (enable)
+ err = mlx5e_arfs_enable(priv->fs);
+ else
+ err = mlx5e_arfs_disable(priv->fs);
+
+ return err;
+}
+#endif
+
+static int mlx5e_handle_feature(struct net_device *netdev,
+ netdev_features_t *features,
+ netdev_features_t feature,
+ mlx5e_feature_handler feature_handler)
+{
+ netdev_features_t changes = *features ^ netdev->features;
+ bool enable = !!(*features & feature);
+ int err;
+
+ if (!(changes & feature))
+ return 0;
+
+ err = feature_handler(netdev, enable);
+ if (err) {
+ MLX5E_SET_FEATURE(features, feature, !enable);
+ netdev_err(netdev, "%s feature %pNF failed, err %d\n",
+ enable ? "Enable" : "Disable", &feature, err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
+{
+ netdev_features_t oper_features = features;
+ int err = 0;
+
+#define MLX5E_HANDLE_FEATURE(feature, handler) \
+ mlx5e_handle_feature(netdev, &oper_features, feature, handler)
+
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
+ set_feature_cvlan_filter);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
+#ifdef CONFIG_MLX5_EN_ARFS
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
+#endif
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx);
+
+ if (err) {
+ netdev->features = oper_features;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ features &= ~NETIF_F_HW_TLS_RX;
+ if (netdev->features & NETIF_F_HW_TLS_RX)
+ netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
+
+ features &= ~NETIF_F_HW_TLS_TX;
+ if (netdev->features & NETIF_F_HW_TLS_TX)
+ netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
+
+ features &= ~NETIF_F_NTUPLE;
+ if (netdev->features & NETIF_F_NTUPLE)
+ netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
+
+ features &= ~NETIF_F_GRO_HW;
+ if (netdev->features & NETIF_F_GRO_HW)
+ netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
+
+ features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+ netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n");
+
+ return features;
+}
+
+static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_vlan_table *vlan;
+ struct mlx5e_params *params;
+
+ if (!netif_device_present(netdev))
+ return features;
+
+ vlan = mlx5e_fs_get_vlan(priv->fs);
+ mutex_lock(&priv->state_lock);
+ params = &priv->channels.params;
+ if (!vlan ||
+ !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) {
+ /* HW strips the outer C-tag header, this is a problem
+ * for S-tag traffic.
+ */
+ features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+ if (!params->vlan_strip_disable)
+ netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
+ }
+
+ if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
+ features &= ~NETIF_F_LRO;
+ }
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
+ if (params->xdp_prog) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "LRO is incompatible with XDP\n");
+ features &= ~NETIF_F_LRO;
+ }
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
+ if (priv->xsk.refcnt) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n",
+ priv->xsk.refcnt);
+ features &= ~NETIF_F_LRO;
+ }
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
+ priv->xsk.refcnt);
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ features &= ~NETIF_F_RXHASH;
+ if (netdev->features & NETIF_F_RXHASH)
+ netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
+ if (mlx5e_is_uplink_rep(priv)) {
+ features = mlx5e_fix_uplink_rep_features(netdev, features);
+ features |= NETIF_F_NETNS_LOCAL;
+ } else {
+ features &= ~NETIF_F_NETNS_LOCAL;
+ }
+
+ mutex_unlock(&priv->state_lock);
+
+ return features;
+}
+
+static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
+ struct mlx5e_channels *chs,
+ struct mlx5e_params *new_params,
+ struct mlx5_core_dev *mdev)
+{
+ u16 ix;
+
+ for (ix = 0; ix < chs->params.num_channels; ix++) {
+ struct xsk_buff_pool *xsk_pool =
+ mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
+ struct mlx5e_xsk_param xsk;
+ int max_xdp_mtu;
+
+ if (!xsk_pool)
+ continue;
+
+ mlx5e_build_xsk_param(xsk_pool, &xsk);
+ max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk);
+
+ /* Validate XSK params and XDP MTU in advance */
+ if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) ||
+ new_params->sw_mtu > max_xdp_mtu) {
+ u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
+ int max_mtu_frame, max_mtu_page, max_mtu;
+
+ /* Two criteria must be met:
+ * 1. HW MTU + all headrooms <= XSK frame size.
+ * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
+ */
+ max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
+ max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0));
+ max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu);
+
+ netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n",
+ new_params->sw_mtu, ix, max_mtu);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool mlx5e_params_validate_xdp(struct net_device *netdev,
+ struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ bool is_linear;
+
+ /* No XSK params: AF_XDP can't be enabled yet at the point of setting
+ * the XDP program.
+ */
+ is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL);
+
+ if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
+ netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
+ if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) {
+ netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
+ params->sw_mtu,
+ mlx5e_xdp_max_mtu(params, NULL));
+ return false;
+ }
+
+ return true;
+}
+
+int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
+ mlx5e_fp_preactivate preactivate)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params new_params;
+ struct mlx5e_params *params;
+ bool reset = true;
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ params = &priv->channels.params;
+
+ new_params = *params;
+ new_params.sw_mtu = new_mtu;
+ err = mlx5e_validate_params(priv->mdev, &new_params);
+ if (err)
+ goto out;
+
+ if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, priv->mdev,
+ &new_params)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (priv->xsk.refcnt &&
+ !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
+ &new_params, priv->mdev)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
+ reset = false;
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) {
+ bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL);
+ bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
+ &new_params, NULL);
+ u8 sz_old = mlx5e_mpwqe_get_log_rq_size(priv->mdev, params, NULL);
+ u8 sz_new = mlx5e_mpwqe_get_log_rq_size(priv->mdev, &new_params, NULL);
+
+ /* Always reset in linear mode - hw_mtu is used in data path.
+ * Check that the mode was non-linear and didn't change.
+ * If XSK is active, XSK RQs are linear.
+ * Reset if the RQ size changed, even if it's non-linear.
+ */
+ if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt &&
+ sz_old == sz_new)
+ reset = false;
+ }
+
+ err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset);
+
+out:
+ netdev->mtu = params->sw_mtu;
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
+}
+
+int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
+{
+ bool set = *(bool *)ctx;
+
+ return mlx5e_ptp_rx_manage_fs(priv, set);
+}
+
+static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter)
+{
+ bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
+ int err;
+
+ if (!rx_filter)
+ /* Reset CQE compression to Admin default */
+ return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false);
+
+ if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+ return 0;
+
+ /* Disable CQE compression */
+ netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
+ err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true);
+ if (err)
+ netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
+
+ return err;
+}
+
+static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx)
+{
+ struct mlx5e_params new_params;
+
+ if (ptp_rx == priv->channels.params.ptp_rx)
+ return 0;
+
+ new_params = priv->channels.params;
+ new_params.ptp_rx = ptp_rx;
+ return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
+ &new_params.ptp_rx, true);
+}
+
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ bool rx_cqe_compress_def;
+ bool ptp_rx;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
+ (mlx5_clock_get_ptp_index(priv->mdev) == -1))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ /* TX HW timestamp */
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ mutex_lock(&priv->state_lock);
+ rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
+
+ /* RX HW timestamp */
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ ptp_rx = false;
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ /* ptp_rx is set if both HW TS is set and CQE
+ * compression is set
+ */
+ ptp_rx = rx_cqe_compress_def;
+ break;
+ default:
+ err = -ERANGE;
+ goto err_unlock;
+ }
+
+ if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
+ err = mlx5e_hwstamp_config_no_ptp_rx(priv,
+ config.rx_filter != HWTSTAMP_FILTER_NONE);
+ else
+ err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx);
+ if (err)
+ goto err_unlock;
+
+ memcpy(&priv->tstamp, &config, sizeof(config));
+ mutex_unlock(&priv->state_lock);
+
+ /* might need to fix some features */
+ netdev_update_features(priv->netdev);
+
+ return copy_to_user(ifr->ifr_data, &config,
+ sizeof(config)) ? -EFAULT : 0;
+err_unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+ struct hwtstamp_config *cfg = &priv->tstamp;
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+ return -EOPNOTSUPP;
+
+ return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
+}
+
+static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return mlx5e_hwstamp_set(priv, ifr);
+ case SIOCGHWTSTAMP:
+ return mlx5e_hwstamp_get(priv, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
+}
+
+static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+ __be16 vlan_proto)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (vlan_proto != htons(ETH_P_8021Q))
+ return -EPROTONOSUPPORT;
+
+ return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
+ vlan, qos);
+}
+
+static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting);
+}
+
+static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
+}
+
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1,
+ max_tx_rate, min_tx_rate);
+}
+
+static int mlx5_vport_link2ifla(u8 esw_link)
+{
+ switch (esw_link) {
+ case MLX5_VPORT_ADMIN_STATE_DOWN:
+ return IFLA_VF_LINK_STATE_DISABLE;
+ case MLX5_VPORT_ADMIN_STATE_UP:
+ return IFLA_VF_LINK_STATE_ENABLE;
+ }
+ return IFLA_VF_LINK_STATE_AUTO;
+}
+
+static int mlx5_ifla_link2vport(u8 ifla_link)
+{
+ switch (ifla_link) {
+ case IFLA_VF_LINK_STATE_DISABLE:
+ return MLX5_VPORT_ADMIN_STATE_DOWN;
+ case IFLA_VF_LINK_STATE_ENABLE:
+ return MLX5_VPORT_ADMIN_STATE_UP;
+ }
+ return MLX5_VPORT_ADMIN_STATE_AUTO;
+}
+
+static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
+ int link_state)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (mlx5e_is_uplink_rep(priv))
+ return -EOPNOTSUPP;
+
+ return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
+ mlx5_ifla_link2vport(link_state));
+}
+
+int mlx5e_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (!netif_device_present(dev))
+ return -EOPNOTSUPP;
+
+ err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
+ if (err)
+ return err;
+ ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate);
+ return 0;
+}
+
+int mlx5e_get_vf_stats(struct net_device *dev,
+ int vf, struct ifla_vf_stats *vf_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
+ vf_stats);
+}
+
+static bool
+mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (!netif_device_present(dev))
+ return false;
+
+ if (!mlx5e_is_uplink_rep(priv))
+ return false;
+
+ return mlx5e_rep_has_offload_stats(dev, attr_id);
+}
+
+static int
+mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (!mlx5e_is_uplink_rep(priv))
+ return -EOPNOTSUPP;
+
+ return mlx5e_rep_get_offload_stats(attr_id, dev, sp);
+}
+#endif
+
+static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type)
+{
+ switch (proto_type) {
+ case IPPROTO_GRE:
+ return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_tx));
+ default:
+ return false;
+ }
+}
+
+static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev,
+ struct sk_buff *skb)
+{
+ switch (skb->inner_protocol) {
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ case htons(ETH_P_TEB):
+ return true;
+ case htons(ETH_P_MPLS_UC):
+ case htons(ETH_P_MPLS_MC):
+ return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre);
+ }
+ return false;
+}
+
+static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ unsigned int offset = 0;
+ struct udphdr *udph;
+ u8 proto;
+ u16 port;
+
+ switch (vlan_get_protocol(skb)) {
+ case htons(ETH_P_IP):
+ proto = ip_hdr(skb)->protocol;
+ break;
+ case htons(ETH_P_IPV6):
+ proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
+ break;
+ default:
+ goto out;
+ }
+
+ switch (proto) {
+ case IPPROTO_GRE:
+ if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb))
+ return features;
+ break;
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ if (mlx5e_tunnel_proto_supported_tx(priv->mdev, IPPROTO_IPIP))
+ return features;
+ break;
+ case IPPROTO_UDP:
+ udph = udp_hdr(skb);
+ port = be16_to_cpu(udph->dest);
+
+ /* Verify if UDP port is being offloaded by HW */
+ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
+ return features;
+
+#if IS_ENABLED(CONFIG_GENEVE)
+ /* Support Geneve offload for default UDP port */
+ if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev))
+ return features;
+#endif
+ break;
+#ifdef CONFIG_MLX5_EN_IPSEC
+ case IPPROTO_ESP:
+ return mlx5e_ipsec_feature_check(skb, features);
+#endif
+ }
+
+out:
+ /* Disable CSUM and GSO if the udp dport is not offloaded by HW */
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ features = vlan_features_check(skb, features);
+ features = vxlan_features_check(skb, features);
+
+ /* Validate if the tunneled packet is being offloaded by HW */
+ if (skb->encapsulation &&
+ (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
+ return mlx5e_tunnel_features_check(priv, skb, features);
+
+ return features;
+}
+
+static void mlx5e_tx_timeout_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ tx_timeout_work);
+ struct net_device *netdev = priv->netdev;
+ int i;
+
+ /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues
+ * through this flow. However, channel closing flows have to wait for
+ * this work to finish while holding rtnl lock too. So either get the
+ * lock or find that channels are being closed for other reason and
+ * this work is not relevant anymore.
+ */
+ while (!rtnl_trylock()) {
+ if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state))
+ return;
+ msleep(20);
+ }
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ for (i = 0; i < netdev->real_num_tx_queues; i++) {
+ struct netdev_queue *dev_queue =
+ netdev_get_tx_queue(netdev, i);
+ struct mlx5e_txqsq *sq = priv->txq2sq[i];
+
+ if (!netif_xmit_stopped(dev_queue))
+ continue;
+
+ if (mlx5e_reporter_tx_timeout(sq))
+ /* break if tried to reopened channels */
+ break;
+ }
+
+unlock:
+ rtnl_unlock();
+}
+
+static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ netdev_err(dev, "TX timeout detected\n");
+ queue_work(priv->wq, &priv->tx_timeout_work);
+}
+
+static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_params new_params;
+
+ if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+ netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
+ return -EINVAL;
+ }
+
+ new_params = priv->channels.params;
+ new_params.xdp_prog = prog;
+
+ if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog)
+{
+ struct bpf_prog *old_prog;
+
+ old_prog = rcu_replace_pointer(rq->xdp_prog, prog,
+ lockdep_is_held(&rq->priv->state_lock));
+ if (old_prog)
+ bpf_prog_put(old_prog);
+}
+
+static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params new_params;
+ struct bpf_prog *old_prog;
+ int err = 0;
+ bool reset;
+ int i;
+
+ mutex_lock(&priv->state_lock);
+
+ if (prog) {
+ err = mlx5e_xdp_allowed(priv, prog);
+ if (err)
+ goto unlock;
+ }
+
+ /* no need for full reset when exchanging programs */
+ reset = (!priv->channels.params.xdp_prog || !prog);
+
+ new_params = priv->channels.params;
+ new_params.xdp_prog = prog;
+
+ /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be
+ * supported with the new parameters: if PAGE_SIZE is bigger than
+ * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though
+ * the MTU is small enough for the linear mode, because XDP uses strides
+ * of PAGE_SIZE on regular RQs.
+ */
+ if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
+ /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */
+ err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params);
+ if (err)
+ goto unlock;
+ }
+
+ old_prog = priv->channels.params.xdp_prog;
+
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
+ if (err)
+ goto unlock;
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+ goto unlock;
+
+ /* exchanging programs w/o reset, we update ref counts on behalf
+ * of the channels RQs here.
+ */
+ bpf_prog_add(prog, priv->channels.num);
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ mlx5e_rq_replace_xdp_prog(&c->rq, prog);
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) {
+ bpf_prog_inc(prog);
+ mlx5e_rq_replace_xdp_prog(&c->xskrq, prog);
+ }
+ }
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+
+ /* Need to fix some features. */
+ if (!err)
+ netdev_update_features(netdev);
+
+ return err;
+}
+
+static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return mlx5e_xdp_set(dev, xdp->prog);
+ case XDP_SETUP_XSK_POOL:
+ return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool,
+ xdp->xsk.queue_id);
+ default:
+ return -EINVAL;
+ }
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev, u32 filter_mask,
+ int nlflags)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 mode, setting;
+ int err;
+
+ err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting);
+ if (err)
+ return err;
+ mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
+ mode,
+ 0, 0, nlflags, filter_mask, NULL);
+}
+
+static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+ u16 flags, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct nlattr *attr, *br_spec;
+ u16 mode = BRIDGE_MODE_UNDEF;
+ u8 setting;
+ int rem;
+
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ if (!br_spec)
+ return -EINVAL;
+
+ nla_for_each_nested(attr, br_spec, rem) {
+ if (nla_type(attr) != IFLA_BRIDGE_MODE)
+ continue;
+
+ if (nla_len(attr) < sizeof(mode))
+ return -EINVAL;
+
+ mode = nla_get_u16(attr);
+ if (mode > BRIDGE_MODE_VEPA)
+ return -EINVAL;
+
+ break;
+ }
+
+ if (mode == BRIDGE_MODE_UNDEF)
+ return -EINVAL;
+
+ setting = (mode == BRIDGE_MODE_VEPA) ? 1 : 0;
+ return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting);
+}
+#endif
+
+const struct net_device_ops mlx5e_netdev_ops = {
+ .ndo_open = mlx5e_open,
+ .ndo_stop = mlx5e_close,
+ .ndo_start_xmit = mlx5e_xmit,
+ .ndo_setup_tc = mlx5e_setup_tc,
+ .ndo_select_queue = mlx5e_select_queue,
+ .ndo_get_stats64 = mlx5e_get_stats,
+ .ndo_set_rx_mode = mlx5e_set_rx_mode,
+ .ndo_set_mac_address = mlx5e_set_mac,
+ .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid,
+ .ndo_set_features = mlx5e_set_features,
+ .ndo_fix_features = mlx5e_fix_features,
+ .ndo_change_mtu = mlx5e_change_nic_mtu,
+ .ndo_eth_ioctl = mlx5e_ioctl,
+ .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate,
+ .ndo_features_check = mlx5e_features_check,
+ .ndo_tx_timeout = mlx5e_tx_timeout,
+ .ndo_bpf = mlx5e_xdp,
+ .ndo_xdp_xmit = mlx5e_xdp_xmit,
+ .ndo_xsk_wakeup = mlx5e_xsk_wakeup,
+#ifdef CONFIG_MLX5_EN_ARFS
+ .ndo_rx_flow_steer = mlx5e_rx_flow_steer,
+#endif
+#ifdef CONFIG_MLX5_ESWITCH
+ .ndo_bridge_setlink = mlx5e_bridge_setlink,
+ .ndo_bridge_getlink = mlx5e_bridge_getlink,
+
+ /* SRIOV E-Switch NDOs */
+ .ndo_set_vf_mac = mlx5e_set_vf_mac,
+ .ndo_set_vf_vlan = mlx5e_set_vf_vlan,
+ .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk,
+ .ndo_set_vf_trust = mlx5e_set_vf_trust,
+ .ndo_set_vf_rate = mlx5e_set_vf_rate,
+ .ndo_get_vf_config = mlx5e_get_vf_config,
+ .ndo_set_vf_link_state = mlx5e_set_vf_link_state,
+ .ndo_get_vf_stats = mlx5e_get_vf_stats,
+ .ndo_has_offload_stats = mlx5e_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_get_offload_stats,
+#endif
+ .ndo_get_devlink_port = mlx5e_get_devlink_port,
+};
+
+static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
+{
+ int i;
+
+ /* The supported periods are organized in ascending order */
+ for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
+ if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
+ break;
+
+ return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
+}
+
+void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 rx_cq_period_mode;
+
+ params->sw_mtu = mtu;
+ params->hard_mtu = MLX5E_ETH_HARD_MTU;
+ params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
+ priv->max_nch);
+ mlx5e_params_mqprio_reset(params);
+
+ /* SQ */
+ params->log_sq_size = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
+ MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
+
+ /* XDP SQ */
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
+
+ /* set CQE compression */
+ params->rx_cqe_compress_def = false;
+ if (MLX5_CAP_GEN(mdev, cqe_compression) &&
+ MLX5_CAP_GEN(mdev, vport_group_manager))
+ params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
+
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
+
+ /* RQ */
+ mlx5e_build_rq_params(mdev, params);
+
+ params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+
+ /* CQ moderation params */
+ rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+ MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
+ mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+
+ /* TX inline */
+ mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+
+ /* AF_XDP */
+ params->xsk = xsk;
+
+ /* Do not update netdev->features directly in here
+ * on mlx5e_attach_netdev() we will call mlx5e_update_features()
+ * To update netdev->features please modify mlx5e_fix_features()
+ */
+}
+
+static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ u8 addr[ETH_ALEN];
+
+ mlx5_query_mac_address(priv->mdev, addr);
+ if (is_zero_ether_addr(addr) &&
+ !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
+ eth_hw_addr_random(netdev);
+ mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr);
+ return;
+ }
+
+ eth_hw_addr_set(netdev, addr);
+}
+
+static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table,
+ unsigned int entry, struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port));
+}
+
+static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table,
+ unsigned int entry, struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port));
+}
+
+void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv)
+{
+ if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
+ return;
+
+ priv->nic_info.set_port = mlx5e_vxlan_set_port;
+ priv->nic_info.unset_port = mlx5e_vxlan_unset_port;
+ priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
+ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
+ priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
+ /* Don't count the space hard-coded to the IANA port */
+ priv->nic_info.tables[0].n_entries =
+ mlx5_vxlan_max_udp_ports(priv->mdev) - 1;
+
+ priv->netdev->udp_tunnel_nic_info = &priv->nic_info;
+}
+
+static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
+{
+ int tt;
+
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
+ return true;
+ }
+ return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
+}
+
+static void mlx5e_build_nic_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ bool fcs_supported;
+ bool fcs_enabled;
+
+ SET_NETDEV_DEV(netdev, mdev->device);
+
+ netdev->netdev_ops = &mlx5e_netdev_ops;
+
+ mlx5e_dcbnl_build_netdev(netdev);
+
+ netdev->watchdog_timeo = 15 * HZ;
+
+ netdev->ethtool_ops = &mlx5e_ethtool_ops;
+
+ netdev->vlan_features |= NETIF_F_SG;
+ netdev->vlan_features |= NETIF_F_HW_CSUM;
+ netdev->vlan_features |= NETIF_F_GRO;
+ netdev->vlan_features |= NETIF_F_TSO;
+ netdev->vlan_features |= NETIF_F_TSO6;
+ netdev->vlan_features |= NETIF_F_RXCSUM;
+ netdev->vlan_features |= NETIF_F_RXHASH;
+ netdev->vlan_features |= NETIF_F_GSO_PARTIAL;
+
+ netdev->mpls_features |= NETIF_F_SG;
+ netdev->mpls_features |= NETIF_F_HW_CSUM;
+ netdev->mpls_features |= NETIF_F_TSO;
+ netdev->mpls_features |= NETIF_F_TSO6;
+
+ netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX;
+ netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX;
+
+ /* Tunneled LRO is not supported in the driver, and the same RQs are
+ * shared between inner and outer TIRs, so the driver can't disable LRO
+ * for inner TIRs while having it enabled for outer TIRs. Due to this,
+ * block LRO altogether if the firmware declares tunneled LRO support.
+ */
+ if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
+ !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) &&
+ !MLX5_CAP_ETH(mdev, tunnel_lro_gre) &&
+ mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT,
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ netdev->vlan_features |= NETIF_F_LRO;
+
+ netdev->hw_features = netdev->vlan_features;
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
+
+ if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
+ netdev->hw_enc_features |= NETIF_F_HW_CSUM;
+ netdev->hw_enc_features |= NETIF_F_TSO;
+ netdev->hw_enc_features |= NETIF_F_TSO6;
+ netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
+ }
+
+ if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
+ netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ }
+
+ if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
+ netdev->hw_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ netdev->hw_enc_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ netdev->gso_partial_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ }
+
+ if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
+ netdev->hw_features |= NETIF_F_GSO_IPXIP4 |
+ NETIF_F_GSO_IPXIP6;
+ netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 |
+ NETIF_F_GSO_IPXIP6;
+ netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 |
+ NETIF_F_GSO_IPXIP6;
+ }
+
+ netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4;
+ netdev->hw_features |= NETIF_F_GSO_UDP_L4;
+ netdev->features |= NETIF_F_GSO_UDP_L4;
+
+ mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
+
+ if (fcs_supported)
+ netdev->hw_features |= NETIF_F_RXALL;
+
+ if (MLX5_CAP_ETH(mdev, scatter_fcs))
+ netdev->hw_features |= NETIF_F_RXFCS;
+
+ if (mlx5_qos_is_supported(mdev))
+ netdev->hw_features |= NETIF_F_HW_TC;
+
+ netdev->features = netdev->hw_features;
+
+ /* Defaults */
+ if (fcs_enabled)
+ netdev->features &= ~NETIF_F_RXALL;
+ netdev->features &= ~NETIF_F_LRO;
+ netdev->features &= ~NETIF_F_GRO_HW;
+ netdev->features &= ~NETIF_F_RXFCS;
+
+#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+ if (FT_CAP(flow_modify_en) &&
+ FT_CAP(modify_root) &&
+ FT_CAP(identified_miss_table_mode) &&
+ FT_CAP(flow_table_modify)) {
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+ netdev->hw_features |= NETIF_F_HW_TC;
+#endif
+#ifdef CONFIG_MLX5_EN_ARFS
+ netdev->hw_features |= NETIF_F_NTUPLE;
+#endif
+ }
+
+ netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+
+ netif_set_tso_max_size(netdev, GSO_MAX_SIZE);
+ mlx5e_set_netdev_dev_addr(netdev);
+ mlx5e_macsec_build_netdev(priv);
+ mlx5e_ipsec_build_netdev(priv);
+ mlx5e_ktls_build_netdev(priv);
+}
+
+void mlx5e_create_q_counters(struct mlx5e_priv *priv)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
+ if (!err)
+ priv->q_counter =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+
+ err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
+ if (!err)
+ priv->drop_rq_q_counter =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+}
+
+void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ if (priv->q_counter) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ priv->q_counter);
+ mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
+ }
+
+ if (priv->drop_rq_q_counter) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ priv->drop_rq_q_counter);
+ mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
+ }
+}
+
+static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_flow_steering *fs;
+ int err;
+
+ mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu);
+ mlx5e_vxlan_set_netdev_info(priv);
+
+ mlx5e_timestamp_init(priv);
+
+ fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!fs) {
+ err = -ENOMEM;
+ mlx5_core_err(mdev, "FS initialization failed, %d\n", err);
+ return err;
+ }
+ priv->fs = fs;
+
+ err = mlx5e_ipsec_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
+
+ err = mlx5e_ktls_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
+
+ mlx5e_health_create_reporters(priv);
+ return 0;
+}
+
+static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
+{
+ mlx5e_health_destroy_reporters(priv);
+ mlx5e_ktls_cleanup(priv);
+ mlx5e_ipsec_cleanup(priv);
+ mlx5e_fs_cleanup(priv->fs);
+ priv->fs = NULL;
+}
+
+static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ enum mlx5e_rx_res_features features;
+ int err;
+
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res)
+ return -ENOMEM;
+
+ mlx5e_create_q_counters(priv);
+
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ goto err_destroy_q_counters;
+ }
+
+ features = MLX5E_RX_RES_FEATURE_PTP;
+ if (mlx5_tunnel_inner_ft_supported(mdev))
+ features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
+ priv->max_nch, priv->drop_rq.rqn,
+ &priv->channels.params.packet_merge,
+ priv->channels.params.num_channels);
+ if (err)
+ goto err_close_drop_rq;
+
+ err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile,
+ priv->netdev);
+ if (err) {
+ mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
+ goto err_destroy_rx_res;
+ }
+
+ err = mlx5e_tc_nic_init(priv);
+ if (err)
+ goto err_destroy_flow_steering;
+
+ err = mlx5e_accel_init_rx(priv);
+ if (err)
+ goto err_tc_nic_cleanup;
+
+#ifdef CONFIG_MLX5_EN_ARFS
+ priv->netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(priv->mdev);
+#endif
+
+ return 0;
+
+err_tc_nic_cleanup:
+ mlx5e_tc_nic_cleanup(priv);
+err_destroy_flow_steering:
+ mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
+ priv->profile);
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
+err_destroy_q_counters:
+ mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+ return err;
+}
+
+static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
+{
+ mlx5e_accel_cleanup_rx(priv);
+ mlx5e_tc_nic_cleanup(priv);
+ mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
+ priv->profile);
+ mlx5e_rx_res_destroy(priv->rx_res);
+ mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+}
+
+static void mlx5e_set_mqprio_rl(struct mlx5e_priv *priv)
+{
+ struct mlx5e_params *params;
+ struct mlx5e_mqprio_rl *rl;
+
+ params = &priv->channels.params;
+ if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL)
+ return;
+
+ rl = mlx5e_mqprio_rl_create(priv->mdev, params->mqprio.num_tc,
+ params->mqprio.channel.max_rate);
+ if (IS_ERR(rl))
+ rl = NULL;
+ priv->mqprio_rl = rl;
+ mlx5e_mqprio_rl_update_params(params, rl);
+}
+
+static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_create_tises(priv);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5e_accel_init_tx(priv);
+ if (err)
+ goto err_destroy_tises;
+
+ mlx5e_set_mqprio_rl(priv);
+ mlx5e_dcbnl_initialize(priv);
+ return 0;
+
+err_destroy_tises:
+ mlx5e_destroy_tises(priv);
+ return err;
+}
+
+static void mlx5e_nic_enable(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ mlx5e_fs_init_l2_addr(priv->fs, netdev);
+
+ err = mlx5e_macsec_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "MACsec initialization failed, %d\n", err);
+
+ /* Marking the link as currently not needed by the Driver */
+ if (!netif_running(netdev))
+ mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN);
+
+ mlx5e_set_netdev_mtu_boundaries(priv);
+ mlx5e_set_dev_port_mtu(priv);
+
+ mlx5_lag_add_netdev(mdev, netdev);
+
+ mlx5e_enable_async_events(priv);
+ mlx5e_enable_blocking_events(priv);
+ if (mlx5e_monitor_counter_supported(priv))
+ mlx5e_monitor_counter_init(priv);
+
+ mlx5e_hv_vhca_stats_create(priv);
+ if (netdev->reg_state != NETREG_REGISTERED)
+ return;
+ mlx5e_dcbnl_init_app(priv);
+
+ mlx5e_nic_set_rx_mode(priv);
+
+ rtnl_lock();
+ if (netif_running(netdev))
+ mlx5e_open(netdev);
+ udp_tunnel_nic_reset_ntf(priv->netdev);
+ netif_device_attach(netdev);
+ rtnl_unlock();
+}
+
+static void mlx5e_nic_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (priv->netdev->reg_state == NETREG_REGISTERED)
+ mlx5e_dcbnl_delete_app(priv);
+
+ rtnl_lock();
+ if (netif_running(priv->netdev))
+ mlx5e_close(priv->netdev);
+ netif_device_detach(priv->netdev);
+ rtnl_unlock();
+
+ mlx5e_nic_set_rx_mode(priv);
+
+ mlx5e_hv_vhca_stats_destroy(priv);
+ if (mlx5e_monitor_counter_supported(priv))
+ mlx5e_monitor_counter_cleanup(priv);
+
+ mlx5e_disable_blocking_events(priv);
+ if (priv->en_trap) {
+ mlx5e_deactivate_trap(priv);
+ mlx5e_close_trap(priv->en_trap);
+ priv->en_trap = NULL;
+ }
+ mlx5e_disable_async_events(priv);
+ mlx5_lag_remove_netdev(mdev, priv->netdev);
+ mlx5_vxlan_reset_to_default(mdev->vxlan);
+ mlx5e_macsec_cleanup(priv);
+}
+
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
+{
+ return mlx5e_refresh_tirs(priv, false, false);
+}
+
+static const struct mlx5e_profile mlx5e_nic_profile = {
+ .init = mlx5e_nic_init,
+ .cleanup = mlx5e_nic_cleanup,
+ .init_rx = mlx5e_init_nic_rx,
+ .cleanup_rx = mlx5e_cleanup_nic_rx,
+ .init_tx = mlx5e_init_nic_tx,
+ .cleanup_tx = mlx5e_cleanup_nic_tx,
+ .enable = mlx5e_nic_enable,
+ .disable = mlx5e_nic_disable,
+ .update_rx = mlx5e_update_nic_rx,
+ .update_stats = mlx5e_stats_update_ndo_stats,
+ .update_carrier = mlx5e_update_carrier,
+ .rx_handlers = &mlx5e_rx_handlers_nic,
+ .max_tc = MLX5E_MAX_NUM_TC,
+ .stats_grps = mlx5e_nic_stats_grps,
+ .stats_grps_num = mlx5e_nic_stats_grps_num,
+ .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) |
+ BIT(MLX5E_PROFILE_FEATURE_PTP_TX) |
+ BIT(MLX5E_PROFILE_FEATURE_QOS_HTB) |
+ BIT(MLX5E_PROFILE_FEATURE_FS_VLAN) |
+ BIT(MLX5E_PROFILE_FEATURE_FS_TC),
+};
+
+static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile)
+{
+ int nch;
+
+ nch = mlx5e_get_max_num_channels(mdev);
+
+ if (profile->max_nch_limit)
+ nch = min_t(int, nch, profile->max_nch_limit(mdev));
+ return nch;
+}
+
+static unsigned int
+mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
+ const struct mlx5e_profile *profile)
+
+{
+ unsigned int max_nch, tmp;
+
+ /* core resources */
+ max_nch = mlx5e_profile_max_num_channels(mdev, profile);
+
+ /* netdev rx queues */
+ max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues);
+
+ /* netdev tx queues */
+ tmp = netdev->num_tx_queues;
+ if (mlx5_qos_is_supported(mdev))
+ tmp -= mlx5e_qos_max_leaf_nodes(mdev);
+ if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
+ tmp -= profile->max_tc;
+ tmp = tmp / profile->max_tc;
+ max_nch = min_t(unsigned int, max_nch, tmp);
+
+ return max_nch;
+}
+
+int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev)
+{
+ /* Indirect TIRS: 2 sets of TTCs (inner + outer steering)
+ * and 1 set of direct TIRS
+ */
+ return 2 * MLX5E_NUM_INDIR_TIRS
+ + mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile);
+}
+
+void mlx5e_set_rx_mode_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ set_rx_mode_work);
+
+ return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev);
+}
+
+/* mlx5e generic netdev management API (move to en_common.c) */
+int mlx5e_priv_init(struct mlx5e_priv *priv,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev,
+ struct mlx5_core_dev *mdev)
+{
+ int nch, num_txqs, node;
+ int err;
+
+ num_txqs = netdev->num_tx_queues;
+ nch = mlx5e_calc_max_nch(mdev, netdev, profile);
+ node = dev_to_node(mlx5_core_dma_dev(mdev));
+
+ /* priv init */
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->msglevel = MLX5E_MSG_LEVEL;
+ priv->max_nch = nch;
+ priv->max_opened_tc = 1;
+
+ if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
+ mutex_init(&priv->state_lock);
+
+ err = mlx5e_selq_init(&priv->selq, &priv->state_lock);
+ if (err)
+ goto err_free_cpumask;
+
+ INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
+ INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+ INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
+ INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+ priv->wq = create_singlethread_workqueue("mlx5e");
+ if (!priv->wq)
+ goto err_free_selq;
+
+ priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node);
+ if (!priv->txq2sq)
+ goto err_destroy_workqueue;
+
+ priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node);
+ if (!priv->tx_rates)
+ goto err_free_txq2sq;
+
+ priv->channel_stats =
+ kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node);
+ if (!priv->channel_stats)
+ goto err_free_tx_rates;
+
+ return 0;
+
+err_free_tx_rates:
+ kfree(priv->tx_rates);
+err_free_txq2sq:
+ kfree(priv->txq2sq);
+err_destroy_workqueue:
+ destroy_workqueue(priv->wq);
+err_free_selq:
+ mlx5e_selq_cleanup(&priv->selq);
+err_free_cpumask:
+ free_cpumask_var(priv->scratchpad.cpumask);
+ return -ENOMEM;
+}
+
+void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
+{
+ int i;
+
+ /* bail if change profile failed and also rollback failed */
+ if (!priv->mdev)
+ return;
+
+ for (i = 0; i < priv->stats_nch; i++)
+ kvfree(priv->channel_stats[i]);
+ kfree(priv->channel_stats);
+ kfree(priv->tx_rates);
+ kfree(priv->txq2sq);
+ destroy_workqueue(priv->wq);
+ mutex_lock(&priv->state_lock);
+ mlx5e_selq_cleanup(&priv->selq);
+ mutex_unlock(&priv->state_lock);
+ free_cpumask_var(priv->scratchpad.cpumask);
+
+ for (i = 0; i < priv->htb_max_qos_sqs; i++)
+ kfree(priv->htb_qos_sq_stats[i]);
+ kvfree(priv->htb_qos_sq_stats);
+
+ memset(priv, 0, sizeof(*priv));
+}
+
+static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile)
+{
+ unsigned int nch, ptp_txqs, qos_txqs;
+
+ nch = mlx5e_profile_max_num_channels(mdev, profile);
+
+ ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) &&
+ mlx5e_profile_feature_cap(profile, PTP_TX) ?
+ profile->max_tc : 0;
+
+ qos_txqs = mlx5_qos_is_supported(mdev) &&
+ mlx5e_profile_feature_cap(profile, QOS_HTB) ?
+ mlx5e_qos_max_leaf_nodes(mdev) : 0;
+
+ return nch * profile->max_tc + ptp_txqs + qos_txqs;
+}
+
+static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile)
+{
+ return mlx5e_profile_max_num_channels(mdev, profile);
+}
+
+struct net_device *
+mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile)
+{
+ struct net_device *netdev;
+ unsigned int txqs, rxqs;
+ int err;
+
+ txqs = mlx5e_get_max_num_txqs(mdev, profile);
+ rxqs = mlx5e_get_max_num_rxqs(mdev, profile);
+
+ netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs);
+ if (!netdev) {
+ mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
+ return NULL;
+ }
+
+ err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
+ goto err_free_netdev;
+ }
+
+ netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+ dev_net_set(netdev, mlx5_core_net(mdev));
+
+ return netdev;
+
+err_free_netdev:
+ free_netdev(netdev);
+
+ return NULL;
+}
+
+static void mlx5e_update_features(struct net_device *netdev)
+{
+ if (netdev->reg_state != NETREG_REGISTERED)
+ return; /* features will be updated on netdev registration */
+
+ rtnl_lock();
+ netdev_update_features(netdev);
+ rtnl_unlock();
+}
+
+static void mlx5e_reset_channels(struct net_device *netdev)
+{
+ netdev_reset_tc(netdev);
+}
+
+int mlx5e_attach_netdev(struct mlx5e_priv *priv)
+{
+ const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
+ const struct mlx5e_profile *profile = priv->profile;
+ int max_nch;
+ int err;
+
+ clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ if (priv->fs)
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+
+ /* Validate the max_wqe_size_sq capability. */
+ if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) {
+ mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %lu\n",
+ mlx5e_get_max_sq_wqebbs(priv->mdev), MLX5E_MAX_TX_WQEBBS);
+ return -EIO;
+ }
+
+ /* max number of channels may have changed */
+ max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile);
+ if (priv->channels.params.num_channels > max_nch) {
+ mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
+ /* Reducing the number of channels - RXFH has to be reset, and
+ * mlx5e_num_channels_changed below will build the RQT.
+ */
+ priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ priv->channels.params.num_channels = max_nch;
+ if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
+ mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n");
+ mlx5e_params_mqprio_reset(&priv->channels.params);
+ }
+ }
+ if (max_nch != priv->max_nch) {
+ mlx5_core_warn(priv->mdev,
+ "MLX5E: Updating max number of channels from %u to %u\n",
+ priv->max_nch, max_nch);
+ priv->max_nch = max_nch;
+ }
+
+ /* 1. Set the real number of queues in the kernel the first time.
+ * 2. Set our default XPS cpumask.
+ * 3. Build the RQT.
+ *
+ * rtnl_lock is required by netif_set_real_num_*_queues in case the
+ * netdev has been registered by this point (if this function was called
+ * in the reload or resume flow).
+ */
+ if (take_rtnl)
+ rtnl_lock();
+ err = mlx5e_num_channels_changed(priv);
+ if (take_rtnl)
+ rtnl_unlock();
+ if (err)
+ goto out;
+
+ err = profile->init_tx(priv);
+ if (err)
+ goto out;
+
+ err = profile->init_rx(priv);
+ if (err)
+ goto err_cleanup_tx;
+
+ if (profile->enable)
+ profile->enable(priv);
+
+ mlx5e_update_features(priv->netdev);
+
+ return 0;
+
+err_cleanup_tx:
+ profile->cleanup_tx(priv);
+
+out:
+ mlx5e_reset_channels(priv->netdev);
+ set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ if (priv->fs)
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ cancel_work_sync(&priv->update_stats_work);
+ return err;
+}
+
+void mlx5e_detach_netdev(struct mlx5e_priv *priv)
+{
+ const struct mlx5e_profile *profile = priv->profile;
+
+ set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ if (priv->fs)
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+
+ if (profile->disable)
+ profile->disable(priv);
+ flush_workqueue(priv->wq);
+
+ profile->cleanup_rx(priv);
+ profile->cleanup_tx(priv);
+ mlx5e_reset_channels(priv->netdev);
+ cancel_work_sync(&priv->update_stats_work);
+}
+
+static int
+mlx5e_netdev_init_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *new_profile, void *new_ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ err = mlx5e_priv_init(priv, new_profile, netdev, mdev);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
+ return err;
+ }
+ netif_carrier_off(netdev);
+ priv->profile = new_profile;
+ priv->ppriv = new_ppriv;
+ err = new_profile->init(priv->mdev, priv->netdev);
+ if (err)
+ goto priv_cleanup;
+
+ return 0;
+
+priv_cleanup:
+ mlx5e_priv_cleanup(priv);
+ return err;
+}
+
+static int
+mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *new_profile, void *new_ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ err = mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv);
+ if (err)
+ return err;
+
+ err = mlx5e_attach_netdev(priv);
+ if (err)
+ goto profile_cleanup;
+ return err;
+
+profile_cleanup:
+ new_profile->cleanup(priv);
+ mlx5e_priv_cleanup(priv);
+ return err;
+}
+
+int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
+ const struct mlx5e_profile *new_profile, void *new_ppriv)
+{
+ const struct mlx5e_profile *orig_profile = priv->profile;
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *orig_ppriv = priv->ppriv;
+ int err, rollback_err;
+
+ /* cleanup old profile */
+ mlx5e_detach_netdev(priv);
+ priv->profile->cleanup(priv);
+ mlx5e_priv_cleanup(priv);
+
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv);
+ set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ return -EIO;
+ }
+
+ err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv);
+ if (err) { /* roll back to original profile */
+ netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err);
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv);
+ if (rollback_err)
+ netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n",
+ __func__, rollback_err);
+ return err;
+}
+
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv)
+{
+ mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL);
+}
+
+void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+
+ mlx5e_priv_cleanup(priv);
+ free_netdev(netdev);
+}
+
+static int mlx5e_resume(struct auxiliary_device *adev)
+{
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = edev->mdev;
+ int err;
+
+ if (netif_device_present(netdev))
+ return 0;
+
+ err = mlx5e_create_mdev_resources(mdev);
+ if (err)
+ return err;
+
+ err = mlx5e_attach_netdev(priv);
+ if (err) {
+ mlx5e_destroy_mdev_resources(mdev);
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
+{
+ struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!netif_device_present(netdev)) {
+ if (test_bit(MLX5E_STATE_DESTROYING, &priv->state))
+ mlx5e_destroy_mdev_resources(mdev);
+ return -ENODEV;
+ }
+
+ mlx5e_detach_netdev(priv);
+ mlx5e_destroy_mdev_resources(mdev);
+ return 0;
+}
+
+static int mlx5e_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ const struct mlx5e_profile *profile = &mlx5e_nic_profile;
+ struct mlx5_core_dev *mdev = edev->mdev;
+ struct net_device *netdev;
+ pm_message_t state = {};
+ struct mlx5e_priv *priv;
+ int err;
+
+ netdev = mlx5e_create_netdev(mdev, profile);
+ if (!netdev) {
+ mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
+ return -ENOMEM;
+ }
+
+ mlx5e_build_nic_netdev(netdev);
+
+ priv = netdev_priv(netdev);
+ auxiliary_set_drvdata(adev, priv);
+
+ priv->profile = profile;
+ priv->ppriv = NULL;
+
+ err = mlx5e_devlink_port_register(priv);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
+ goto err_destroy_netdev;
+ }
+
+ err = profile->init(mdev, netdev);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
+ goto err_devlink_cleanup;
+ }
+
+ err = mlx5e_resume(adev);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err);
+ goto err_profile_cleanup;
+ }
+
+ err = register_netdev(netdev);
+ if (err) {
+ mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
+ goto err_resume;
+ }
+
+ mlx5e_devlink_port_type_eth_set(priv);
+
+ mlx5e_dcbnl_init_app(priv);
+ mlx5_uplink_netdev_set(mdev, netdev);
+ return 0;
+
+err_resume:
+ mlx5e_suspend(adev, state);
+err_profile_cleanup:
+ profile->cleanup(priv);
+err_devlink_cleanup:
+ mlx5e_devlink_port_unregister(priv);
+err_destroy_netdev:
+ mlx5e_destroy_netdev(priv);
+ return err;
+}
+
+static void mlx5e_remove(struct auxiliary_device *adev)
+{
+ struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
+ pm_message_t state = {};
+
+ mlx5e_dcbnl_delete_app(priv);
+ unregister_netdev(priv->netdev);
+ mlx5e_suspend(adev, state);
+ priv->profile->cleanup(priv);
+ mlx5e_devlink_port_unregister(priv);
+ mlx5e_destroy_netdev(priv);
+}
+
+static const struct auxiliary_device_id mlx5e_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".eth", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5e_id_table);
+
+static struct auxiliary_driver mlx5e_driver = {
+ .name = "eth",
+ .probe = mlx5e_probe,
+ .remove = mlx5e_remove,
+ .suspend = mlx5e_suspend,
+ .resume = mlx5e_resume,
+ .id_table = mlx5e_id_table,
+};
+
+int mlx5e_init(void)
+{
+ int ret;
+
+ mlx5e_build_ptys2ethtool_map();
+ ret = auxiliary_driver_register(&mlx5e_driver);
+ if (ret)
+ return ret;
+
+ ret = mlx5e_rep_init();
+ if (ret)
+ auxiliary_driver_unregister(&mlx5e_driver);
+ return ret;
+}
+
+void mlx5e_cleanup(void)
+{
+ mlx5e_rep_cleanup();
+ auxiliary_driver_unregister(&mlx5e_driver);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
new file mode 100644
index 000000000..5aeca9534
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -0,0 +1,1521 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/fs.h>
+#include <net/switchdev.h>
+#include <net/pkt_cls.h>
+#include <net/act_api.h>
+#include <net/devlink.h>
+#include <net/ipv6_stubs.h>
+
+#include "eswitch.h"
+#include "en.h"
+#include "en_rep.h"
+#include "en/params.h"
+#include "en/txrx.h"
+#include "en_tc.h"
+#include "en/rep/tc.h"
+#include "en/rep/neigh.h"
+#include "en/rep/bridge.h"
+#include "en/devlink.h"
+#include "fs_core.h"
+#include "lib/mlx5.h"
+#include "lib/devcom.h"
+#include "lib/vxlan.h"
+#define CREATE_TRACE_POINTS
+#include "diag/en_rep_tracepoint.h"
+#include "en_accel/ipsec.h"
+#include "en/tc/int_port.h"
+#include "en/ptp.h"
+#include "en/fs_ethtool.h"
+
+#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
+ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1
+
+static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
+
+static void mlx5e_rep_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int count;
+
+ strscpy(drvinfo->driver, mlx5e_rep_driver_name,
+ sizeof(drvinfo->driver));
+ count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
+ fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
+ if (count >= sizeof(drvinfo->fw_version))
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d", fw_rev_maj(mdev),
+ fw_rev_min(mdev), fw_rev_sub(mdev));
+}
+
+static const struct counter_desc sw_rep_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+};
+
+struct vport_stats {
+ u64 vport_rx_packets;
+ u64 vport_tx_packets;
+ u64 vport_rx_bytes;
+ u64 vport_tx_bytes;
+};
+
+static const struct counter_desc vport_rep_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_packets) },
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_packets) },
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_bytes) },
+};
+
+#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
+#define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw_rep)
+{
+ return NUM_VPORT_REP_SW_COUNTERS;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw_rep)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ sw_rep_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw_rep)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
+ sw_rep_stats_desc, i);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw_rep)
+{
+ struct mlx5e_sw_stats *s = &priv->stats.sw;
+ struct rtnl_link_stats64 stats64 = {};
+
+ memset(s, 0, sizeof(*s));
+ mlx5e_fold_sw_stats64(priv, &stats64);
+
+ s->rx_packets = stats64.rx_packets;
+ s->rx_bytes = stats64.rx_bytes;
+ s->tx_packets = stats64.tx_packets;
+ s->tx_bytes = stats64.tx_bytes;
+ s->tx_queue_dropped = stats64.tx_dropped;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport_rep)
+{
+ return NUM_VPORT_REP_HW_COUNTERS;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_rep_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport,
+ vport_rep_stats_desc, i);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct rtnl_link_stats64 *vport_stats;
+ struct ifla_vf_stats vf_stats;
+ int err;
+
+ err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats);
+ if (err) {
+ netdev_warn(priv->netdev, "vport %d error %d reading stats\n",
+ rep->vport, err);
+ return;
+ }
+
+ vport_stats = &priv->stats.vf_vport;
+ /* flip tx/rx as we are reporting the counters for the switch vport */
+ vport_stats->rx_packets = vf_stats.tx_packets;
+ vport_stats->rx_bytes = vf_stats.tx_bytes;
+ vport_stats->tx_packets = vf_stats.rx_packets;
+ vport_stats->tx_bytes = vf_stats.rx_bytes;
+}
+
+static void mlx5e_rep_get_strings(struct net_device *dev,
+ u32 stringset, uint8_t *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ mlx5e_stats_fill_strings(priv, data);
+ break;
+ }
+}
+
+static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
+}
+
+static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ return mlx5e_stats_total_num(priv);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void
+mlx5e_rep_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
+}
+
+static int
+mlx5e_rep_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_ringparam(priv, param);
+}
+
+static void mlx5e_rep_get_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_channels(priv, ch);
+}
+
+static int mlx5e_rep_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_channels(priv, ch);
+}
+
+static int mlx5e_rep_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
+}
+
+static int mlx5e_rep_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack);
+}
+
+static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_rxfh_key_size(priv);
+}
+
+static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_rxfh_indir_size(priv);
+}
+
+static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
+ .get_drvinfo = mlx5e_rep_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlx5e_rep_get_strings,
+ .get_sset_count = mlx5e_rep_get_sset_count,
+ .get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+ .get_ringparam = mlx5e_rep_get_ringparam,
+ .set_ringparam = mlx5e_rep_set_ringparam,
+ .get_channels = mlx5e_rep_get_channels,
+ .set_channels = mlx5e_rep_set_channels,
+ .get_coalesce = mlx5e_rep_get_coalesce,
+ .set_coalesce = mlx5e_rep_set_coalesce,
+ .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size,
+ .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
+};
+
+static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_sq *rep_sq, *tmp;
+ struct mlx5e_rep_priv *rpriv;
+
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ if (rep_sq->send_to_vport_rule_peer)
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+ list_del(&rep_sq->list);
+ kfree(rep_sq);
+ }
+}
+
+static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ u32 *sqns_array, int sqns_num)
+{
+ struct mlx5_eswitch *peer_esw = NULL;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+ int err;
+ int i;
+
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return 0;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS);
+
+ for (i = 0; i < sqns_num; i++) {
+ rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
+ if (!rep_sq) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ /* Add re-inject rule to the PF/representor sqs */
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep,
+ sqns_array[i]);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ kfree(rep_sq);
+ goto out_err;
+ }
+ rep_sq->send_to_vport_rule = flow_rule;
+ rep_sq->sqn = sqns_array[i];
+
+ if (peer_esw) {
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+ rep, sqns_array[i]);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ kfree(rep_sq);
+ goto out_err;
+ }
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
+ list_add(&rep_sq->list, &rpriv->vport_sqs_list);
+ }
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
+ return 0;
+
+out_err:
+ mlx5e_sqs2vport_stop(esw, rep);
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
+ return err;
+}
+
+static int
+mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
+{
+ int sqs_per_channel = mlx5e_get_dcb_num_tc(&priv->channels.params);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool is_uplink_rep = mlx5e_is_uplink_rep(priv);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int n, tc, nch, num_sqs = 0;
+ struct mlx5e_channel *c;
+ int err = -ENOMEM;
+ bool ptp_sq;
+ u32 *sqs;
+
+ ptp_sq = !!(priv->channels.ptp &&
+ MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS));
+ nch = priv->channels.num + ptp_sq;
+ /* +2 for xdpsqs, they don't exist on the ptp channel but will not be
+ * counted for by num_sqs.
+ */
+ if (is_uplink_rep)
+ sqs_per_channel += 2;
+
+ sqs = kvcalloc(nch * sqs_per_channel, sizeof(*sqs), GFP_KERNEL);
+ if (!sqs)
+ goto out;
+
+ for (n = 0; n < priv->channels.num; n++) {
+ c = priv->channels.c[n];
+ for (tc = 0; tc < c->num_tc; tc++)
+ sqs[num_sqs++] = c->sq[tc].sqn;
+
+ if (is_uplink_rep) {
+ if (c->xdp)
+ sqs[num_sqs++] = c->rq_xdpsq.sqn;
+
+ sqs[num_sqs++] = c->xdpsq.sqn;
+ }
+ }
+ if (ptp_sq) {
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+
+ for (tc = 0; tc < ptp_ch->num_tc; tc++)
+ sqs[num_sqs++] = ptp_ch->ptpsq[tc].txqsq.sqn;
+ }
+
+ err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs);
+ kvfree(sqs);
+
+out:
+ if (err)
+ netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err);
+ return err;
+}
+
+static void
+mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ mlx5e_sqs2vport_stop(esw, rep);
+}
+
+static int
+mlx5e_rep_add_meta_tunnel_rule(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_group *g;
+
+ g = esw->fdb_table.offloads.send_to_vport_meta_grp;
+ if (!g)
+ return 0;
+
+ flow_rule = mlx5_eswitch_add_send_to_vport_meta_rule(esw, rep->vport);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+
+ rpriv->send_to_vport_meta_rule = flow_rule;
+
+ return 0;
+}
+
+static void
+mlx5e_rep_del_meta_tunnel_rule(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (rpriv->send_to_vport_meta_rule)
+ mlx5_eswitch_del_send_to_vport_meta_rule(rpriv->send_to_vport_meta_rule);
+}
+
+void mlx5e_rep_activate_channels(struct mlx5e_priv *priv)
+{
+ mlx5e_add_sqs_fwd_rules(priv);
+ mlx5e_rep_add_meta_tunnel_rule(priv);
+}
+
+void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv)
+{
+ mlx5e_rep_del_meta_tunnel_rule(priv);
+ mlx5e_remove_sqs_fwd_rules(priv);
+}
+
+static int mlx5e_rep_open(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_open_locked(dev);
+ if (err)
+ goto unlock;
+
+ if (!mlx5_modify_vport_admin_state(priv->mdev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, 1,
+ MLX5_VPORT_ADMIN_STATE_UP))
+ netif_carrier_on(dev);
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_rep_close(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int ret;
+
+ mutex_lock(&priv->state_lock);
+ mlx5_modify_vport_admin_state(priv->mdev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, 1,
+ MLX5_VPORT_ADMIN_STATE_DOWN);
+ ret = mlx5e_close_locked(dev);
+ mutex_unlock(&priv->state_lock);
+ return ret;
+}
+
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
+
+ if (!MLX5_ESWITCH_MANAGER(priv->mdev))
+ return false;
+
+ if (!rpriv) /* non vport rep mlx5e instances don't use this field */
+ return false;
+
+ rep = rpriv->rep;
+ return (rep->vport == MLX5_VPORT_UPLINK);
+}
+
+bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+ switch (attr_id) {
+ case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+ return true;
+ }
+
+ return false;
+}
+
+static int
+mlx5e_get_sw_stats64(const struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_fold_sw_stats64(priv, stats);
+ return 0;
+}
+
+int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
+{
+ switch (attr_id) {
+ case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+ return mlx5e_get_sw_stats64(dev, sp);
+ }
+
+ return -EINVAL;
+}
+
+static void
+mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ /* update HW stats in background for next time */
+ mlx5e_queue_update_stats(priv);
+ memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
+}
+
+static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, NULL);
+}
+
+static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_core_dev *dev = priv->mdev;
+
+ return mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+}
+
+static int mlx5e_rep_change_carrier(struct net_device *dev, bool new_carrier)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int err;
+
+ if (new_carrier) {
+ err = mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, 1, MLX5_VPORT_ADMIN_STATE_UP);
+ if (err)
+ return err;
+ netif_carrier_on(dev);
+ } else {
+ err = mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, 1, MLX5_VPORT_ADMIN_STATE_DOWN);
+ if (err)
+ return err;
+ netif_carrier_off(dev);
+ }
+ return 0;
+}
+
+static const struct net_device_ops mlx5e_netdev_ops_rep = {
+ .ndo_open = mlx5e_rep_open,
+ .ndo_stop = mlx5e_rep_close,
+ .ndo_start_xmit = mlx5e_xmit,
+ .ndo_setup_tc = mlx5e_rep_setup_tc,
+ .ndo_get_devlink_port = mlx5e_rep_get_devlink_port,
+ .ndo_get_stats64 = mlx5e_rep_get_stats,
+ .ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
+ .ndo_change_mtu = mlx5e_rep_change_mtu,
+ .ndo_change_carrier = mlx5e_rep_change_carrier,
+};
+
+bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev)
+{
+ return netdev->netdev_ops == &mlx5e_netdev_ops &&
+ mlx5e_is_uplink_rep(netdev_priv(netdev));
+}
+
+bool mlx5e_eswitch_vf_rep(const struct net_device *netdev)
+{
+ return netdev->netdev_ops == &mlx5e_netdev_ops_rep;
+}
+
+/* One indirect TIR set for outer. Inner not supported in reps. */
+#define REP_NUM_INDIR_TIRS MLX5E_NUM_INDIR_TIRS
+
+static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev)
+{
+ int max_tir_num = 1 << MLX5_CAP_GEN(mdev, log_max_tir);
+ int num_vports = mlx5_eswitch_get_total_vports(mdev);
+
+ return (max_tir_num - mlx5e_get_pf_num_tirs(mdev)
+ - (num_vports * REP_NUM_INDIR_TIRS)) / num_vports;
+}
+
+static void mlx5e_build_rep_params(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params *params;
+
+ u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+ MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+
+ params = &priv->channels.params;
+
+ params->num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS;
+ params->hard_mtu = MLX5E_ETH_HARD_MTU;
+ params->sw_mtu = netdev->mtu;
+
+ /* SQ */
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ else
+ params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;
+
+ /* RQ */
+ mlx5e_build_rq_params(mdev, params);
+
+ /* CQ moderation params */
+ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
+
+ params->mqprio.num_tc = 1;
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ params->vlan_strip_disable = true;
+
+ mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+}
+
+static void mlx5e_build_rep_netdev(struct net_device *netdev,
+ struct mlx5_core_dev *mdev)
+{
+ SET_NETDEV_DEV(netdev, mdev->device);
+ netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+ eth_hw_addr_random(netdev);
+ netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
+
+ netdev->watchdog_timeo = 15 * HZ;
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+ netdev->hw_features |= NETIF_F_HW_TC;
+#endif
+ netdev->hw_features |= NETIF_F_SG;
+ netdev->hw_features |= NETIF_F_IP_CSUM;
+ netdev->hw_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_features |= NETIF_F_GRO;
+ netdev->hw_features |= NETIF_F_TSO;
+ netdev->hw_features |= NETIF_F_TSO6;
+ netdev->hw_features |= NETIF_F_RXCSUM;
+
+ netdev->features |= netdev->hw_features;
+ netdev->features |= NETIF_F_NETNS_LOCAL;
+}
+
+static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
+ struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ priv->fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!priv->fs) {
+ netdev_err(priv->netdev, "FS allocation failed\n");
+ return -ENOMEM;
+ }
+
+ mlx5e_build_rep_params(netdev);
+ mlx5e_timestamp_init(priv);
+
+ return 0;
+}
+
+static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev,
+ struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ priv->fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!priv->fs) {
+ netdev_err(priv->netdev, "FS allocation failed\n");
+ return -ENOMEM;
+ }
+
+ err = mlx5e_ipsec_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "Uplink rep IPsec initialization failed, %d\n", err);
+
+ mlx5e_vxlan_set_netdev_info(priv);
+ mlx5e_build_rep_params(netdev);
+ mlx5e_timestamp_init(priv);
+ return 0;
+}
+
+static void mlx5e_cleanup_rep(struct mlx5e_priv *priv)
+{
+ mlx5e_fs_cleanup(priv->fs);
+ mlx5e_ipsec_cleanup(priv);
+ priv->fs = NULL;
+}
+
+static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct ttc_params ttc_params = {};
+ int err;
+
+ mlx5e_fs_set_ns(priv->fs,
+ mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL), false);
+
+ /* The inner_ttc in the ttc params is intentionally not set */
+ mlx5e_set_ttc_params(priv->fs, priv->rx_res, &ttc_params, false);
+
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ /* To give uplik rep TTC a lower level for chaining from root ft */
+ ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1;
+
+ mlx5e_fs_set_ttc(priv->fs, mlx5_create_ttc_table(priv->mdev, &ttc_params), false);
+ if (IS_ERR(mlx5e_fs_get_ttc(priv->fs, false))) {
+ err = PTR_ERR(mlx5e_fs_get_ttc(priv->fs, false));
+ netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n",
+ err);
+ return err;
+ }
+ return 0;
+}
+
+static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ int err = 0;
+
+ if (rep->vport != MLX5_VPORT_UPLINK) {
+ /* non uplik reps will skip any bypass tables and go directly to
+ * their own ttc
+ */
+ rpriv->root_ft = mlx5_get_ttc_flow_table(mlx5e_fs_get_ttc(priv->fs, false));
+ return 0;
+ }
+
+ /* uplink root ft will be used to auto chain, to ethtool or ttc tables */
+ ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+ if (!ns) {
+ netdev_err(priv->netdev, "Failed to get reps offloads namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.max_fte = 0; /* Empty table, miss rule will always point to next table */
+ ft_attr.prio = 1;
+ ft_attr.level = 1;
+
+ rpriv->root_ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(rpriv->root_ft)) {
+ err = PTR_ERR(rpriv->root_ft);
+ rpriv->root_ft = NULL;
+ }
+
+ return err;
+}
+
+static void mlx5e_destroy_rep_root_ft(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ return;
+ mlx5_destroy_flow_table(rpriv->root_ft);
+}
+
+static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_destination dest;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = rpriv->root_ft;
+
+ flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, rep->vport, &dest);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+ rpriv->vport_rx_rule = flow_rule;
+ return 0;
+}
+
+static void rep_vport_rx_rule_destroy(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (!rpriv->vport_rx_rule)
+ return;
+
+ mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ rpriv->vport_rx_rule = NULL;
+}
+
+int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
+{
+ rep_vport_rx_rule_destroy(priv);
+
+ return cleanup ? 0 : mlx5e_create_rep_vport_rx_rule(priv);
+}
+
+static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res) {
+ err = -ENOMEM;
+ goto err_free_fs;
+ }
+
+ mlx5e_fs_init_l2_addr(priv->fs, priv->netdev);
+
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ goto err_rx_res_free;
+ }
+
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+ priv->max_nch, priv->drop_rq.rqn,
+ &priv->channels.params.packet_merge,
+ priv->channels.params.num_channels);
+ if (err)
+ goto err_close_drop_rq;
+
+ err = mlx5e_create_rep_ttc_table(priv);
+ if (err)
+ goto err_destroy_rx_res;
+
+ err = mlx5e_create_rep_root_ft(priv);
+ if (err)
+ goto err_destroy_ttc_table;
+
+ err = mlx5e_create_rep_vport_rx_rule(priv);
+ if (err)
+ goto err_destroy_root_ft;
+
+ mlx5e_ethtool_init_steering(priv->fs);
+
+ return 0;
+
+err_destroy_root_ft:
+ mlx5e_destroy_rep_root_ft(priv);
+err_destroy_ttc_table:
+ mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false));
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
+err_rx_res_free:
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+err_free_fs:
+ mlx5e_fs_cleanup(priv->fs);
+ priv->fs = NULL;
+ return err;
+}
+
+static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
+{
+ mlx5e_ethtool_cleanup_steering(priv->fs);
+ rep_vport_rx_rule_destroy(priv);
+ mlx5e_destroy_rep_root_ft(priv);
+ mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false));
+ mlx5e_rx_res_destroy(priv->rx_res);
+ mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+}
+
+static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ mlx5e_create_q_counters(priv);
+ err = mlx5e_init_rep_rx(priv);
+ if (err)
+ goto out;
+
+ mlx5e_tc_int_port_init_rep_rx(priv);
+
+out:
+ return err;
+}
+
+static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv)
+{
+ mlx5e_tc_int_port_cleanup_rep_rx(priv);
+ mlx5e_cleanup_rep_rx(priv);
+ mlx5e_destroy_q_counters(priv);
+}
+
+static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ int err;
+
+ netdev = rpriv->netdev;
+ priv = netdev_priv(netdev);
+ uplink_priv = &rpriv->uplink_priv;
+
+ err = mlx5e_rep_tc_init(rpriv);
+ if (err)
+ return err;
+
+ mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
+
+ mlx5e_rep_bond_init(rpriv);
+ err = mlx5e_rep_tc_netdevice_event_register(rpriv);
+ if (err) {
+ mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n",
+ err);
+ goto err_event_reg;
+ }
+
+ return 0;
+
+err_event_reg:
+ mlx5e_rep_bond_cleanup(rpriv);
+ mlx5e_rep_tc_cleanup(rpriv);
+ return err;
+}
+
+static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
+{
+ mlx5e_rep_tc_netdevice_event_unregister(rpriv);
+ mlx5e_rep_bond_cleanup(rpriv);
+ mlx5e_rep_tc_cleanup(rpriv);
+}
+
+static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = mlx5e_create_tises(priv);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err)
+ goto err_neigh_init;
+
+ if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
+ err = mlx5e_init_uplink_rep_tx(rpriv);
+ if (err)
+ goto err_init_tx;
+ }
+
+ err = mlx5e_tc_ht_init(&rpriv->tc_ht);
+ if (err)
+ goto err_ht_init;
+
+ return 0;
+
+err_ht_init:
+ if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
+ mlx5e_cleanup_uplink_rep_tx(rpriv);
+err_init_tx:
+ mlx5e_rep_neigh_cleanup(rpriv);
+err_neigh_init:
+ mlx5e_destroy_tises(priv);
+ return err;
+}
+
+static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ mlx5e_tc_ht_cleanup(&rpriv->tc_ht);
+
+ if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
+ mlx5e_cleanup_uplink_rep_tx(rpriv);
+
+ mlx5e_rep_neigh_cleanup(rpriv);
+ mlx5e_destroy_tises(priv);
+}
+
+static void mlx5e_rep_enable(struct mlx5e_priv *priv)
+{
+ mlx5e_set_netdev_mtu_boundaries(priv);
+}
+
+static void mlx5e_rep_disable(struct mlx5e_priv *priv)
+{
+}
+
+static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+
+ if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
+ struct mlx5_eqe *eqe = data;
+
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ queue_work(priv->wq, &priv->update_carrier_work);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+ }
+
+ if (event == MLX5_DEV_EVENT_PORT_AFFINITY)
+ return mlx5e_rep_tc_event_port_affinity(priv);
+
+ return NOTIFY_DONE;
+}
+
+static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
+
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_dev_port_mtu(priv);
+
+ mlx5e_rep_tc_enable(priv);
+
+ if (MLX5_CAP_GEN(mdev, uplink_follow))
+ mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK,
+ 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO);
+ mlx5_lag_add_netdev(mdev, netdev);
+ priv->events_nb.notifier_call = uplink_rep_async_event;
+ mlx5_notifier_register(mdev, &priv->events_nb);
+ mlx5e_dcbnl_initialize(priv);
+ mlx5e_dcbnl_init_app(priv);
+ mlx5e_rep_bridge_init(priv);
+
+ netdev->wanted_features |= NETIF_F_HW_TC;
+
+ rtnl_lock();
+ if (netif_running(netdev))
+ mlx5e_open(netdev);
+ udp_tunnel_nic_reset_ntf(priv->netdev);
+ netif_device_attach(netdev);
+ rtnl_unlock();
+}
+
+static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ rtnl_lock();
+ if (netif_running(priv->netdev))
+ mlx5e_close(priv->netdev);
+ netif_device_detach(priv->netdev);
+ rtnl_unlock();
+
+ mlx5e_rep_bridge_cleanup(priv);
+ mlx5e_dcbnl_delete_app(priv);
+ mlx5_notifier_unregister(mdev, &priv->events_nb);
+ mlx5e_rep_tc_disable(priv);
+ mlx5_lag_remove_netdev(mdev, priv->netdev);
+ mlx5_vxlan_reset_to_default(mdev->vxlan);
+}
+
+static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
+static MLX5E_DEFINE_STATS_GRP(vport_rep, MLX5E_NDO_UPDATE_STATS);
+
+/* The stats groups order is opposite to the update_stats() order calls */
+static mlx5e_stats_grp_t mlx5e_rep_stats_grps[] = {
+ &MLX5E_STATS_GRP(sw_rep),
+ &MLX5E_STATS_GRP(vport_rep),
+};
+
+static unsigned int mlx5e_rep_stats_grps_num(struct mlx5e_priv *priv)
+{
+ return ARRAY_SIZE(mlx5e_rep_stats_grps);
+}
+
+/* The stats groups order is opposite to the update_stats() order calls */
+static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = {
+ &MLX5E_STATS_GRP(sw),
+ &MLX5E_STATS_GRP(qcnt),
+ &MLX5E_STATS_GRP(vnic_env),
+ &MLX5E_STATS_GRP(vport),
+ &MLX5E_STATS_GRP(802_3),
+ &MLX5E_STATS_GRP(2863),
+ &MLX5E_STATS_GRP(2819),
+ &MLX5E_STATS_GRP(phy),
+ &MLX5E_STATS_GRP(eth_ext),
+ &MLX5E_STATS_GRP(pcie),
+ &MLX5E_STATS_GRP(per_prio),
+ &MLX5E_STATS_GRP(pme),
+ &MLX5E_STATS_GRP(channels),
+ &MLX5E_STATS_GRP(per_port_buff_congest),
+#ifdef CONFIG_MLX5_EN_IPSEC
+ &MLX5E_STATS_GRP(ipsec_sw),
+#endif
+ &MLX5E_STATS_GRP(ptp),
+};
+
+static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv)
+{
+ return ARRAY_SIZE(mlx5e_ul_rep_stats_grps);
+}
+
+static const struct mlx5e_profile mlx5e_rep_profile = {
+ .init = mlx5e_init_rep,
+ .cleanup = mlx5e_cleanup_rep,
+ .init_rx = mlx5e_init_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_rep_tx,
+ .enable = mlx5e_rep_enable,
+ .disable = mlx5e_rep_disable,
+ .update_rx = mlx5e_update_rep_rx,
+ .update_stats = mlx5e_stats_update_ndo_stats,
+ .rx_handlers = &mlx5e_rx_handlers_rep,
+ .max_tc = 1,
+ .stats_grps = mlx5e_rep_stats_grps,
+ .stats_grps_num = mlx5e_rep_stats_grps_num,
+ .max_nch_limit = mlx5e_rep_max_nch_limit,
+};
+
+static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
+ .init = mlx5e_init_ul_rep,
+ .cleanup = mlx5e_cleanup_rep,
+ .init_rx = mlx5e_init_ul_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_ul_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_rep_tx,
+ .enable = mlx5e_uplink_rep_enable,
+ .disable = mlx5e_uplink_rep_disable,
+ .update_rx = mlx5e_update_rep_rx,
+ .update_stats = mlx5e_stats_update_ndo_stats,
+ .update_carrier = mlx5e_update_carrier,
+ .rx_handlers = &mlx5e_rx_handlers_rep,
+ .max_tc = MLX5E_MAX_NUM_TC,
+ .stats_grps = mlx5e_ul_rep_stats_grps,
+ .stats_grps_num = mlx5e_ul_rep_stats_grps_num,
+};
+
+/* e-Switch vport representors */
+static int
+mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev));
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct devlink_port *dl_port;
+ int err;
+
+ rpriv->netdev = priv->netdev;
+
+ err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile,
+ rpriv);
+ if (err)
+ return err;
+
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_eth_set(dl_port, rpriv->netdev);
+
+ return 0;
+}
+
+static void
+mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv)
+{
+ struct net_device *netdev = rpriv->netdev;
+ struct devlink_port *dl_port;
+ struct mlx5_core_dev *dev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+ dev = priv->mdev;
+
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_clear(dl_port);
+ mlx5e_netdev_attach_nic_profile(priv);
+}
+
+static int
+mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ const struct mlx5e_profile *profile;
+ struct devlink_port *dl_port;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ int err;
+
+ profile = &mlx5e_rep_profile;
+ netdev = mlx5e_create_netdev(dev, profile);
+ if (!netdev) {
+ mlx5_core_warn(dev,
+ "Failed to create representor netdev for vport %d\n",
+ rep->vport);
+ return -EINVAL;
+ }
+
+ mlx5e_build_rep_netdev(netdev, dev);
+ rpriv->netdev = netdev;
+
+ priv = netdev_priv(netdev);
+ priv->profile = profile;
+ priv->ppriv = rpriv;
+ err = profile->init(dev, netdev);
+ if (err) {
+ netdev_warn(netdev, "rep profile init failed, %d\n", err);
+ goto err_destroy_netdev;
+ }
+
+ err = mlx5e_attach_netdev(netdev_priv(netdev));
+ if (err) {
+ netdev_warn(netdev,
+ "Failed to attach representor netdev for vport %d\n",
+ rep->vport);
+ goto err_cleanup_profile;
+ }
+
+ err = register_netdev(netdev);
+ if (err) {
+ netdev_warn(netdev,
+ "Failed to register representor netdev for vport %d\n",
+ rep->vport);
+ goto err_detach_netdev;
+ }
+
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_eth_set(dl_port, netdev);
+ return 0;
+
+err_detach_netdev:
+ mlx5e_detach_netdev(netdev_priv(netdev));
+
+err_cleanup_profile:
+ priv->profile->cleanup(priv);
+
+err_destroy_netdev:
+ mlx5e_destroy_netdev(netdev_priv(netdev));
+ return err;
+}
+
+static int
+mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+ int err;
+
+ rpriv = kvzalloc(sizeof(*rpriv), GFP_KERNEL);
+ if (!rpriv)
+ return -ENOMEM;
+
+ /* rpriv->rep to be looked up when profile->init() is called */
+ rpriv->rep = rep;
+ rep->rep_data[REP_ETH].priv = rpriv;
+ INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ err = mlx5e_vport_uplink_rep_load(dev, rep);
+ else
+ err = mlx5e_vport_vf_rep_load(dev, rep);
+
+ if (err)
+ kvfree(rpriv);
+
+ return err;
+}
+
+static void
+mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct devlink_port *dl_port;
+ void *ppriv = priv->ppriv;
+
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ mlx5e_vport_uplink_rep_unload(rpriv);
+ goto free_ppriv;
+ }
+
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_clear(dl_port);
+ unregister_netdev(netdev);
+ mlx5e_detach_netdev(priv);
+ priv->profile->cleanup(priv);
+ mlx5e_destroy_netdev(priv);
+free_ppriv:
+ kvfree(ppriv); /* mlx5e_rep_priv */
+}
+
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+
+ return rpriv->netdev;
+}
+
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (!rep_sq->send_to_vport_rule_peer)
+ continue;
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+ rep_sq->send_to_vport_rule_peer = NULL;
+ }
+}
+
+static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ struct mlx5_eswitch *peer_esw)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (rep_sq->send_to_vport_rule_peer)
+ continue;
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
+ if (IS_ERR(flow_rule))
+ goto err_out;
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
+ return 0;
+err_out:
+ mlx5e_vport_rep_event_unpair(rep);
+ return PTR_ERR(flow_rule);
+}
+
+static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ enum mlx5_switchdev_event event,
+ void *data)
+{
+ int err = 0;
+
+ if (event == MLX5_SWITCHDEV_EVENT_PAIR)
+ err = mlx5e_vport_rep_event_pair(esw, rep, data);
+ else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
+ mlx5e_vport_rep_event_unpair(rep);
+
+ return err;
+}
+
+static const struct mlx5_eswitch_rep_ops rep_ops = {
+ .load = mlx5e_vport_rep_load,
+ .unload = mlx5e_vport_rep_unload,
+ .get_proto_dev = mlx5e_vport_rep_get_proto_dev,
+ .event = mlx5e_vport_rep_event,
+};
+
+static int mlx5e_rep_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = edev->mdev;
+ struct mlx5_eswitch *esw;
+
+ esw = mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH);
+ return 0;
+}
+
+static void mlx5e_rep_remove(struct auxiliary_device *adev)
+{
+ struct mlx5_adev *vdev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = vdev->mdev;
+ struct mlx5_eswitch *esw;
+
+ esw = mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_ETH);
+}
+
+static const struct auxiliary_device_id mlx5e_rep_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".eth-rep", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5e_rep_id_table);
+
+static struct auxiliary_driver mlx5e_rep_driver = {
+ .name = "eth-rep",
+ .probe = mlx5e_rep_probe,
+ .remove = mlx5e_rep_remove,
+ .id_table = mlx5e_rep_id_table,
+};
+
+int mlx5e_rep_init(void)
+{
+ return auxiliary_driver_register(&mlx5e_rep_driver);
+}
+
+void mlx5e_rep_cleanup(void)
+{
+ auxiliary_driver_unregister(&mlx5e_rep_driver);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
new file mode 100644
index 000000000..b4e691760
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_REP_H__
+#define __MLX5E_REP_H__
+
+#include <net/ip_tunnels.h>
+#include <linux/rhashtable.h>
+#include <linux/mutex.h>
+#include "eswitch.h"
+#include "en.h"
+#include "lib/port_tun.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep;
+
+struct mlx5e_neigh_update_table {
+ struct rhashtable neigh_ht;
+ /* Save the neigh hash entries in a list in addition to the hash table
+ * (neigh_ht). In order to iterate easily over the neigh entries.
+ * Used for stats query.
+ */
+ struct list_head neigh_list;
+ /* protect lookup/remove operations */
+ struct mutex encap_lock;
+ struct notifier_block netevent_nb;
+ struct delayed_work neigh_stats_work;
+ unsigned long min_interval; /* jiffies */
+};
+
+struct mlx5_tc_ct_priv;
+struct mlx5_tc_int_port_priv;
+struct mlx5e_rep_bond;
+struct mlx5e_tc_tun_encap;
+struct mlx5e_post_act;
+struct mlx5e_flow_meters;
+
+struct mlx5_rep_uplink_priv {
+ /* indirect block callbacks are invoked on bind/unbind events
+ * on registered higher level devices (e.g. tunnel devices)
+ *
+ * tc_indr_block_cb_priv_list is used to lookup indirect callback
+ * private data
+ *
+ */
+ struct list_head tc_indr_block_priv_list;
+
+ struct mlx5_tun_entropy tun_entropy;
+
+ /* protects unready_flows */
+ struct mutex unready_flows_lock;
+ struct list_head unready_flows;
+ struct work_struct reoffload_flows_work;
+
+ /* maps tun_info to a unique id*/
+ struct mapping_ctx *tunnel_mapping;
+ /* maps tun_enc_opts to a unique id*/
+ struct mapping_ctx *tunnel_enc_opts_mapping;
+
+ struct mlx5e_post_act *post_act;
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct mlx5e_tc_psample *tc_psample;
+
+ /* support eswitch vports bonding */
+ struct mlx5e_rep_bond *bond;
+
+ /* tc tunneling encapsulation private data */
+ struct mlx5e_tc_tun_encap *encap;
+
+ /* OVS internal port support */
+ struct mlx5e_tc_int_port_priv *int_port_priv;
+
+ struct mlx5e_flow_meters *flow_meters;
+};
+
+struct mlx5e_rep_priv {
+ struct mlx5_eswitch_rep *rep;
+ struct mlx5e_neigh_update_table neigh_update;
+ struct net_device *netdev;
+ struct mlx5_flow_table *root_ft;
+ struct mlx5_flow_handle *vport_rx_rule;
+ struct list_head vport_sqs_list;
+ struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
+ struct rtnl_link_stats64 prev_vf_vport_stats;
+ struct mlx5_flow_handle *send_to_vport_meta_rule;
+ struct rhashtable tc_ht;
+};
+
+static inline
+struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
+{
+ return rep->rep_data[REP_ETH].priv;
+}
+
+struct mlx5e_neigh {
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } dst_ip;
+ int family;
+};
+
+struct mlx5e_neigh_hash_entry {
+ struct rhash_head rhash_node;
+ struct mlx5e_neigh m_neigh;
+ struct mlx5e_priv *priv;
+ struct net_device *neigh_dev;
+
+ /* Save the neigh hash entry in a list on the representor in
+ * addition to the hash table. In order to iterate easily over the
+ * neighbour entries. Used for stats query.
+ */
+ struct list_head neigh_list;
+
+ /* protects encap list */
+ spinlock_t encap_list_lock;
+ /* encap list sharing the same neigh */
+ struct list_head encap_list;
+
+ /* neigh hash entry can be deleted only when the refcount is zero.
+ * refcount is needed to avoid neigh hash entry removal by TC, while
+ * it's used by the neigh notification call.
+ */
+ refcount_t refcnt;
+
+ /* Save the last reported time offloaded traffic pass over one of the
+ * neigh hash entry flows. Use it to periodically update the neigh
+ * 'used' value and avoid neigh deleting by the kernel.
+ */
+ unsigned long reported_lastuse;
+
+ struct rcu_head rcu;
+};
+
+enum {
+ /* set when the encap entry is successfully offloaded into HW */
+ MLX5_ENCAP_ENTRY_VALID = BIT(0),
+ MLX5_REFORMAT_DECAP = BIT(1),
+ MLX5_ENCAP_ENTRY_NO_ROUTE = BIT(2),
+};
+
+struct mlx5e_decap_key {
+ struct ethhdr key;
+};
+
+struct mlx5e_decap_entry {
+ struct mlx5e_decap_key key;
+ struct list_head flows;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ struct completion res_ready;
+ int compl_result;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ struct rcu_head rcu;
+};
+
+struct mlx5e_mpls_info {
+ u32 label;
+ u8 tc;
+ u8 bos;
+ u8 ttl;
+};
+
+struct mlx5e_encap_entry {
+ /* attached neigh hash entry */
+ struct mlx5e_neigh_hash_entry *nhe;
+ /* neigh hash entry list of encaps sharing the same neigh */
+ struct list_head encap_list;
+ /* a node of the eswitch encap hash table which keeping all the encap
+ * entries
+ */
+ struct hlist_node encap_hlist;
+ struct list_head flows;
+ struct list_head route_list;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+
+ struct net_device *out_dev;
+ int route_dev_ifindex;
+ struct mlx5e_tc_tunnel *tunnel;
+ int reformat_type;
+ u8 flags;
+ char *encap_header;
+ int encap_size;
+ refcount_t refcnt;
+ struct completion res_ready;
+ int compl_result;
+ struct rcu_head rcu;
+};
+
+struct mlx5e_rep_sq {
+ struct mlx5_flow_handle *send_to_vport_rule;
+ struct mlx5_flow_handle *send_to_vport_rule_peer;
+ u32 sqn;
+ struct list_head list;
+};
+
+int mlx5e_rep_init(void);
+void mlx5e_rep_cleanup(void);
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv);
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+ struct net_device *lag_dev);
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ const struct net_device *netdev,
+ const struct net_device *lag_dev);
+int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup);
+
+bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id);
+int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp);
+
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
+void mlx5e_rep_activate_channels(struct mlx5e_priv *priv);
+void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv);
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+bool mlx5e_eswitch_vf_rep(const struct net_device *netdev);
+bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev);
+static inline bool mlx5e_eswitch_rep(const struct net_device *netdev)
+{
+ return mlx5e_eswitch_vf_rep(netdev) ||
+ mlx5e_eswitch_uplink_rep(netdev);
+}
+
+#else /* CONFIG_MLX5_ESWITCH */
+static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
+static inline void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) {}
+static inline int mlx5e_rep_init(void) { return 0; };
+static inline void mlx5e_rep_cleanup(void) {};
+static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev,
+ int attr_id) { return false; }
+static inline int mlx5e_rep_get_offload_stats(int attr_id,
+ const struct net_device *dev,
+ void *sp) { return -EOPNOTSUPP; }
+#endif
+
+static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv)
+{
+ return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv);
+}
+#endif /* __MLX5E_REP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
new file mode 100644
index 000000000..56d1bd22c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -0,0 +1,2494 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/bitmap.h>
+#include <linux/filter.h>
+#include <net/ip6_checksum.h>
+#include <net/page_pool.h>
+#include <net/inet_ecn.h>
+#include <net/gro.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/xdp_sock_drv.h>
+#include "en.h"
+#include "en/txrx.h"
+#include "en_tc.h"
+#include "eswitch.h"
+#include "en_rep.h"
+#include "en/rep/tc.h"
+#include "ipoib/ipoib.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/macsec.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/ktls_txrx.h"
+#include "en/xdp.h"
+#include "en/xsk/rx.h"
+#include "en/health.h"
+#include "en/params.h"
+#include "devlink.h"
+#include "en/devlink.h"
+
+static struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+static struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = {
+ .handle_rx_cqe = mlx5e_handle_rx_cqe,
+ .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo,
+};
+
+static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
+{
+ return config->rx_filter == HWTSTAMP_FILTER_ALL;
+}
+
+static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq,
+ u32 cqcc, void *data)
+{
+ u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
+
+ memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64));
+}
+
+static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
+ struct mlx5_cqwq *wq,
+ u32 cqcc)
+{
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+ struct mlx5_cqe64 *title = &cqd->title;
+
+ mlx5e_read_cqe_slot(wq, cqcc, title);
+ cqd->left = be32_to_cpu(title->byte_cnt);
+ cqd->wqe_counter = be16_to_cpu(title->wqe_counter);
+ rq->stats->cqe_compress_blks++;
+}
+
+static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq,
+ struct mlx5e_cq_decomp *cqd,
+ u32 cqcc)
+{
+ mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr);
+ cqd->mini_arr_idx = 0;
+}
+
+static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n)
+{
+ u32 cqcc = wq->cc;
+ u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1;
+ u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
+ u32 wq_sz = mlx5_cqwq_get_size(wq);
+ u32 ci_top = min_t(u32, wq_sz, ci + n);
+
+ for (; ci < ci_top; ci++, n--) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
+
+ cqe->op_own = op_own;
+ }
+
+ if (unlikely(ci == wq_sz)) {
+ op_own = !op_own;
+ for (ci = 0; ci < n; ci++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
+
+ cqe->op_own = op_own;
+ }
+ }
+}
+
+static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqwq *wq,
+ u32 cqcc)
+{
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+ struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx];
+ struct mlx5_cqe64 *title = &cqd->title;
+
+ title->byte_cnt = mini_cqe->byte_cnt;
+ title->check_sum = mini_cqe->checksum;
+ title->op_own &= 0xf0;
+ title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz);
+
+ /* state bit set implies linked-list striding RQ wq type and
+ * HW stride index capability supported
+ */
+ if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) {
+ title->wqe_counter = mini_cqe->stridx;
+ return;
+ }
+
+ /* HW stride index capability not supported */
+ title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title);
+ else
+ cqd->wqe_counter =
+ mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1);
+}
+
+static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
+ struct mlx5_cqwq *wq,
+ u32 cqcc)
+{
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+
+ mlx5e_decompress_cqe(rq, wq, cqcc);
+ cqd->title.rss_hash_type = 0;
+ cqd->title.rss_hash_result = 0;
+}
+
+static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
+ struct mlx5_cqwq *wq,
+ int update_owner_only,
+ int budget_rem)
+{
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+ u32 cqcc = wq->cc + update_owner_only;
+ u32 cqe_count;
+ u32 i;
+
+ cqe_count = min_t(u32, cqd->left, budget_rem);
+
+ for (i = update_owner_only; i < cqe_count;
+ i++, cqd->mini_arr_idx++, cqcc++) {
+ if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
+ mlx5e_read_mini_arr_slot(wq, cqd, cqcc);
+
+ mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
+ INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+ rq, &cqd->title);
+ }
+ mlx5e_cqes_update_owner(wq, cqcc - wq->cc);
+ wq->cc = cqcc;
+ cqd->left -= cqe_count;
+ rq->stats->cqe_compress_pkts += cqe_count;
+
+ return cqe_count;
+}
+
+static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
+ struct mlx5_cqwq *wq,
+ int budget_rem)
+{
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+ u32 cc = wq->cc;
+
+ mlx5e_read_title_slot(rq, wq, cc);
+ mlx5e_read_mini_arr_slot(wq, cqd, cc + 1);
+ mlx5e_decompress_cqe(rq, wq, cc);
+ INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+ rq, &cqd->title);
+ cqd->mini_arr_idx++;
+
+ return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
+}
+
+static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page)
+{
+ struct mlx5e_page_cache *cache = &rq->page_cache;
+ u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ if (tail_next == cache->head) {
+ stats->cache_full++;
+ return false;
+ }
+
+ if (!dev_page_is_reusable(page)) {
+ stats->cache_waive++;
+ return false;
+ }
+
+ cache->page_cache[cache->tail] = page;
+ cache->tail = tail_next;
+ return true;
+}
+
+static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
+{
+ struct mlx5e_page_cache *cache = &rq->page_cache;
+ struct mlx5e_rq_stats *stats = rq->stats;
+ dma_addr_t addr;
+
+ if (unlikely(cache->head == cache->tail)) {
+ stats->cache_empty++;
+ return false;
+ }
+
+ if (page_ref_count(cache->page_cache[cache->head]) != 1) {
+ stats->cache_busy++;
+ return false;
+ }
+
+ au->page = cache->page_cache[cache->head];
+ cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
+ stats->cache_reuse++;
+
+ addr = page_pool_get_dma_addr(au->page);
+ /* Non-XSK always uses PAGE_SIZE. */
+ dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir);
+ return true;
+}
+
+static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
+{
+ dma_addr_t addr;
+
+ if (mlx5e_rx_cache_get(rq, au))
+ return 0;
+
+ au->page = page_pool_dev_alloc_pages(rq->page_pool);
+ if (unlikely(!au->page))
+ return -ENOMEM;
+
+ /* Non-XSK always uses PAGE_SIZE. */
+ addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir);
+ if (unlikely(dma_mapping_error(rq->pdev, addr))) {
+ page_pool_recycle_direct(rq->page_pool, au->page);
+ au->page = NULL;
+ return -ENOMEM;
+ }
+ page_pool_set_dma_addr(au->page, addr);
+
+ return 0;
+}
+
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page)
+{
+ dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+
+ dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ page_pool_set_dma_addr(page, 0);
+}
+
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle)
+{
+ if (likely(recycle)) {
+ if (mlx5e_rx_cache_put(rq, page))
+ return;
+
+ mlx5e_page_dma_unmap(rq, page);
+ page_pool_recycle_direct(rq->page_pool, page);
+ } else {
+ mlx5e_page_dma_unmap(rq, page);
+ page_pool_release_page(rq->page_pool, page);
+ put_page(page);
+ }
+}
+
+static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *frag)
+{
+ int err = 0;
+
+ if (!frag->offset)
+ /* On first frag (offset == 0), replenish page (alloc_unit actually).
+ * Other frags that point to the same alloc_unit (with a different
+ * offset) should just use the new one without replenishing again
+ * by themselves.
+ */
+ err = mlx5e_page_alloc_pool(rq, frag->au);
+
+ return err;
+}
+
+static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *frag,
+ bool recycle)
+{
+ if (frag->last_in_page)
+ mlx5e_page_release_dynamic(rq, frag->au->page, recycle);
+}
+
+static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
+{
+ return &rq->wqe.frags[ix << rq->wqe.info.log_num_frags];
+}
+
+static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
+ u16 ix)
+{
+ struct mlx5e_wqe_frag_info *frag = get_frag(rq, ix);
+ int err;
+ int i;
+
+ for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) {
+ dma_addr_t addr;
+ u16 headroom;
+
+ err = mlx5e_get_rx_frag(rq, frag);
+ if (unlikely(err))
+ goto free_frags;
+
+ headroom = i == 0 ? rq->buff.headroom : 0;
+ addr = page_pool_get_dma_addr(frag->au->page);
+ wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom);
+ }
+
+ return 0;
+
+free_frags:
+ while (--i >= 0)
+ mlx5e_put_rx_frag(rq, --frag, true);
+
+ return err;
+}
+
+static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi,
+ bool recycle)
+{
+ int i;
+
+ if (rq->xsk_pool) {
+ /* The `recycle` parameter is ignored, and the page is always
+ * put into the Reuse Ring, because there is no way to return
+ * the page to the userspace when the interface goes down.
+ */
+ xsk_buff_free(wi->au->xsk);
+ return;
+ }
+
+ for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
+ mlx5e_put_rx_frag(rq, wi, recycle);
+}
+
+static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
+
+ mlx5e_free_rx_wqe(rq, wi, false);
+}
+
+static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int i;
+
+ for (i = 0; i < wqe_bulk; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_rx_wqe_cyc *wqe;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+
+ if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, j)))
+ break;
+ }
+
+ return i;
+}
+
+static inline void
+mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
+ union mlx5e_alloc_unit *au, u32 frag_offset, u32 len,
+ unsigned int truesize)
+{
+ dma_addr_t addr = page_pool_get_dma_addr(au->page);
+
+ dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len,
+ rq->buff.map_dir);
+ page_ref_inc(au->page);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ au->page, frag_offset, len, truesize);
+}
+
+static inline void
+mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb,
+ struct page *page, dma_addr_t addr,
+ int offset_from, int dma_offset, u32 headlen)
+{
+ const void *from = page_address(page) + offset_from;
+ /* Aligning len to sizeof(long) optimizes memcpy performance */
+ unsigned int len = ALIGN(headlen, sizeof(long));
+
+ dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len,
+ rq->buff.map_dir);
+ skb_copy_to_linear_data(skb, from, len);
+}
+
+static void
+mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
+{
+ union mlx5e_alloc_unit *alloc_units = wi->alloc_units;
+ bool no_xdp_xmit;
+ int i;
+
+ /* A common case for AF_XDP. */
+ if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe))
+ return;
+
+ no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+
+ if (rq->xsk_pool) {
+ /* The `recycle` parameter is ignored, and the page is always
+ * put into the Reuse Ring, because there is no way to return
+ * the page to the userspace when the interface goes down.
+ */
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
+ if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
+ xsk_buff_free(alloc_units[i].xsk);
+ } else {
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
+ if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
+ mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle);
+ }
+}
+
+static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
+{
+ struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+
+ do {
+ u16 next_wqe_index = mlx5_wq_ll_get_wqe_next_ix(wq, wq->head);
+
+ mlx5_wq_ll_push(wq, next_wqe_index);
+ } while (--n);
+
+ /* ensure wqes are visible to device before updating doorbell record */
+ dma_wmb();
+
+ mlx5_wq_ll_update_db_record(wq);
+}
+
+/* This function returns the size of the continuous free space inside a bitmap
+ * that starts from first and no longer than len including circular ones.
+ */
+static int bitmap_find_window(unsigned long *bitmap, int len,
+ int bitmap_size, int first)
+{
+ int next_one, count;
+
+ next_one = find_next_bit(bitmap, bitmap_size, first);
+ if (next_one == bitmap_size) {
+ if (bitmap_size - first >= len)
+ return len;
+ next_one = find_next_bit(bitmap, bitmap_size, 0);
+ count = next_one + bitmap_size - first;
+ } else {
+ count = next_one - first;
+ }
+
+ return min(len, count);
+}
+
+static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
+ __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs)
+{
+ memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms));
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_UMR);
+ umr_wqe->ctrl.umr_mkey = key;
+ umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT)
+ | MLX5E_KLM_UMR_DS_CNT(klm_len));
+ umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+ umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len);
+ umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+}
+
+static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
+ struct mlx5e_icosq *sq,
+ u16 klm_entries, u16 index)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
+ u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
+ struct page *page = shampo->last_page;
+ u64 addr = shampo->last_addr;
+ struct mlx5e_dma_info *dma_info;
+ struct mlx5e_umr_wqe *umr_wqe;
+ int headroom, i;
+
+ headroom = rq->buff.headroom;
+ new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
+ entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT);
+ wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries);
+ pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
+ build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
+
+ for (i = 0; i < entries; i++, index++) {
+ dma_info = &shampo->info[index];
+ if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
+ MLX5_UMR_KLM_ALIGNMENT))
+ goto update_klm;
+ header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
+ MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
+ if (!(header_offset & (PAGE_SIZE - 1))) {
+ union mlx5e_alloc_unit au;
+
+ err = mlx5e_page_alloc_pool(rq, &au);
+ if (unlikely(err))
+ goto err_unmap;
+ page = dma_info->page = au.page;
+ addr = dma_info->addr = page_pool_get_dma_addr(au.page);
+ } else {
+ dma_info->addr = addr + header_offset;
+ dma_info->page = page;
+ }
+
+update_klm:
+ umr_wqe->inline_klms[i].bcount =
+ cpu_to_be32(MLX5E_RX_MAX_HEAD);
+ umr_wqe->inline_klms[i].key = cpu_to_be32(lkey);
+ umr_wqe->inline_klms[i].va =
+ cpu_to_be64(dma_info->addr + headroom);
+ }
+
+ sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
+ .num_wqebbs = wqe_bbs,
+ .shampo.len = new_entries,
+ };
+
+ shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
+ shampo->last_page = page;
+ shampo->last_addr = addr;
+ sq->pc += wqe_bbs;
+ sq->doorbell_cseg = &umr_wqe->ctrl;
+
+ return 0;
+
+err_unmap:
+ while (--i >= 0) {
+ dma_info = &shampo->info[--index];
+ if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
+ dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
+ mlx5e_page_release_dynamic(rq, dma_info->page, true);
+ }
+ }
+ rq->stats->buff_alloc_err++;
+ return err;
+}
+
+static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u16 klm_entries, num_wqe, index, entries_before;
+ struct mlx5e_icosq *sq = rq->icosq;
+ int i, err, max_klm_entries, len;
+
+ max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
+ klm_entries = bitmap_find_window(shampo->bitmap,
+ shampo->hd_per_wqe,
+ shampo->hd_per_wq, shampo->pi);
+ if (!klm_entries)
+ return 0;
+
+ klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
+ index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT);
+ entries_before = shampo->hd_per_wq - index;
+
+ if (unlikely(entries_before < klm_entries))
+ num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) +
+ DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries);
+ else
+ num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries);
+
+ for (i = 0; i < num_wqe; i++) {
+ len = (klm_entries > max_klm_entries) ? max_klm_entries :
+ klm_entries;
+ if (unlikely(index + len > shampo->hd_per_wq))
+ len = shampo->hd_per_wq - index;
+ err = mlx5e_build_shampo_hd_umr(rq, sq, len, index);
+ if (unlikely(err))
+ return err;
+ index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1);
+ klm_entries -= len;
+ }
+
+ return 0;
+}
+
+static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ union mlx5e_alloc_unit *au = &wi->alloc_units[0];
+ struct mlx5e_icosq *sq = rq->icosq;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_umr_wqe *umr_wqe;
+ u32 offset; /* 17-bit value with MTT. */
+ u16 pi;
+ int err;
+ int i;
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+ err = mlx5e_alloc_rx_hd_mpwqe(rq);
+ if (unlikely(err))
+ goto err;
+ }
+
+ pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
+
+ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) {
+ dma_addr_t addr;
+
+ err = mlx5e_page_alloc_pool(rq, au);
+ if (unlikely(err))
+ goto err_unmap;
+ addr = page_pool_get_dma_addr(au->page);
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
+ }
+
+ bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ wi->consumed_strides = 0;
+
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_UMR);
+
+ offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+
+ sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
+ .num_wqebbs = rq->mpwqe.umr_wqebbs,
+ .umr.rq = rq,
+ };
+
+ sq->pc += rq->mpwqe.umr_wqebbs;
+
+ sq->doorbell_cseg = &umr_wqe->ctrl;
+
+ return 0;
+
+err_unmap:
+ while (--i >= 0) {
+ au--;
+ mlx5e_page_release_dynamic(rq, au->page, true);
+ }
+
+err:
+ rq->stats->buff_alloc_err++;
+
+ return err;
+}
+
+/* This function is responsible to dealloc SHAMPO header buffer.
+ * close == true specifies that we are in the middle of closing RQ operation so
+ * we go over all the entries and if they are not in use we free them,
+ * otherwise we only go over a specific range inside the header buffer that are
+ * not in use.
+ */
+void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ int hd_per_wq = shampo->hd_per_wq;
+ struct page *deleted_page = NULL;
+ struct mlx5e_dma_info *hd_info;
+ int i, index = start;
+
+ for (i = 0; i < len; i++, index++) {
+ if (index == hd_per_wq)
+ index = 0;
+
+ if (close && !test_bit(index, shampo->bitmap))
+ continue;
+
+ hd_info = &shampo->info[index];
+ hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
+ if (hd_info->page != deleted_page) {
+ deleted_page = hd_info->page;
+ mlx5e_page_release_dynamic(rq, hd_info->page, false);
+ }
+ }
+
+ if (start + len > hd_per_wq) {
+ len -= hd_per_wq - start;
+ bitmap_clear(shampo->bitmap, start, hd_per_wq - start);
+ start = 0;
+ }
+
+ bitmap_clear(shampo->bitmap, start, len);
+}
+
+static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ /* Don't recycle, this function is called on rq/netdev close */
+ mlx5e_free_rx_mpwqe(rq, wi, false);
+}
+
+INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int wqe_bulk, count;
+ bool busy = false;
+ u16 head;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return false;
+
+ if (mlx5_wq_cyc_missing(wq) < rq->wqe.info.wqe_bulk)
+ return false;
+
+ if (rq->page_pool)
+ page_pool_nid_changed(rq->page_pool, numa_mem_id());
+
+ wqe_bulk = mlx5_wq_cyc_missing(wq);
+ head = mlx5_wq_cyc_get_head(wq);
+
+ /* Don't allow any newly allocated WQEs to share the same page with old
+ * WQEs that aren't completed yet. Stop earlier.
+ */
+ wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask;
+
+ if (!rq->xsk_pool)
+ count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
+ else if (likely(!rq->xsk_pool->dma_need_sync))
+ count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk);
+ else
+ /* If dma_need_sync is true, it's more efficient to call
+ * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch,
+ * because the latter does the same check and returns only one
+ * frame.
+ */
+ count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk);
+
+ mlx5_wq_cyc_push_n(wq, count);
+ if (unlikely(count != wqe_bulk)) {
+ rq->stats->buff_alloc_err++;
+ busy = true;
+ }
+
+ /* ensure wqes are visible to device before updating doorbell record */
+ dma_wmb();
+
+ mlx5_wq_cyc_update_db_record(wq);
+
+ return busy;
+}
+
+void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)
+{
+ u16 sqcc;
+
+ sqcc = sq->cc;
+
+ while (sqcc != sq->pc) {
+ struct mlx5e_icosq_wqe_info *wi;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+ sqcc += wi->num_wqebbs;
+#ifdef CONFIG_MLX5_EN_TLS
+ switch (wi->wqe_type) {
+ case MLX5E_ICOSQ_WQE_SET_PSV_TLS:
+ mlx5e_ktls_handle_ctx_completion(wi);
+ break;
+ case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
+ mlx5e_ktls_handle_get_psv_completion(wi, sq);
+ break;
+ }
+#endif
+ }
+ sq->cc = sqcc;
+}
+
+static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr,
+ struct mlx5e_icosq *sq)
+{
+ struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq);
+ struct mlx5e_shampo_hd *shampo;
+ /* assume 1:1 relationship between RQ and icosq */
+ struct mlx5e_rq *rq = &c->rq;
+ int end, from, len = umr.len;
+
+ shampo = rq->mpwqe.shampo;
+ end = shampo->hd_per_wq;
+ from = shampo->ci;
+ if (from + len > shampo->hd_per_wq) {
+ len -= end - from;
+ bitmap_set(shampo->bitmap, from, end - from);
+ from = 0;
+ }
+
+ bitmap_set(shampo->bitmap, from, len);
+ shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1);
+}
+
+int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
+{
+ struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
+ struct mlx5_cqe64 *cqe;
+ u16 sqcc;
+ int i;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return 0;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (likely(!cqe))
+ return 0;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ i = 0;
+ do {
+ u16 wqe_counter;
+ bool last_wqe;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ do {
+ struct mlx5e_icosq_wqe_info *wi;
+ u16 ci;
+
+ last_wqe = (sqcc == wqe_counter);
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+ sqcc += wi->num_wqebbs;
+
+ if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ netdev_WARN_ONCE(cq->netdev,
+ "Bad OP in ICOSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+ mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
+ (struct mlx5_err_cqe *)cqe);
+ mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
+ if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
+ queue_work(cq->priv->wq, &sq->recover_work);
+ break;
+ }
+
+ switch (wi->wqe_type) {
+ case MLX5E_ICOSQ_WQE_UMR_RX:
+ wi->umr.rq->mpwqe.umr_completed++;
+ break;
+ case MLX5E_ICOSQ_WQE_NOP:
+ break;
+ case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR:
+ mlx5e_handle_shampo_hd_umr(wi->shampo, sq);
+ break;
+#ifdef CONFIG_MLX5_EN_TLS
+ case MLX5E_ICOSQ_WQE_UMR_TLS:
+ break;
+ case MLX5E_ICOSQ_WQE_SET_PSV_TLS:
+ mlx5e_ktls_handle_ctx_completion(wi);
+ break;
+ case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
+ mlx5e_ktls_handle_get_psv_completion(wi, sq);
+ break;
+#endif
+ default:
+ netdev_WARN_ONCE(cq->netdev,
+ "Bad WQE type in ICOSQ WQE info: 0x%x\n",
+ wi->wqe_type);
+ }
+ } while (!last_wqe);
+ } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+ sq->cc = sqcc;
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ return i;
+}
+
+INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+ u8 umr_completed = rq->mpwqe.umr_completed;
+ struct mlx5e_icosq *sq = rq->icosq;
+ int alloc_err = 0;
+ u8 missing, i;
+ u16 head;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return false;
+
+ if (umr_completed) {
+ mlx5e_post_rx_mpwqe(rq, umr_completed);
+ rq->mpwqe.umr_in_progress -= umr_completed;
+ rq->mpwqe.umr_completed = 0;
+ }
+
+ missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress;
+
+ if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk))
+ rq->stats->congst_umr++;
+
+ if (likely(missing < rq->mpwqe.min_wqe_bulk))
+ return false;
+
+ if (rq->page_pool)
+ page_pool_nid_changed(rq->page_pool, numa_mem_id());
+
+ head = rq->mpwqe.actual_wq_head;
+ i = missing;
+ do {
+ alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) :
+ mlx5e_alloc_rx_mpwqe(rq, head);
+
+ if (unlikely(alloc_err))
+ break;
+ head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
+ } while (--i);
+
+ rq->mpwqe.umr_last_bulk = missing - i;
+ if (sq->doorbell_cseg) {
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+ sq->doorbell_cseg = NULL;
+ }
+
+ rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
+ rq->mpwqe.actual_wq_head = head;
+
+ /* If XSK Fill Ring doesn't have enough frames, report the error, so
+ * that one of the actions can be performed:
+ * 1. If need_wakeup is used, signal that the application has to kick
+ * the driver when it refills the Fill Ring.
+ * 2. Otherwise, busy poll by rescheduling the NAPI poll.
+ */
+ if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool))
+ return true;
+
+ return false;
+}
+
+static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
+{
+ u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+ u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+ (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
+
+ tcp->check = 0;
+ tcp->psh = get_cqe_lro_tcppsh(cqe);
+
+ if (tcp_ack) {
+ tcp->ack = 1;
+ tcp->ack_seq = cqe->lro.ack_seq_num;
+ tcp->window = cqe->lro.tcp_win;
+ }
+}
+
+static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt)
+{
+ struct ethhdr *eth = (struct ethhdr *)(skb->data);
+ struct tcphdr *tcp;
+ int network_depth = 0;
+ __wsum check;
+ __be16 proto;
+ u16 tot_len;
+ void *ip_p;
+
+ proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
+
+ tot_len = cqe_bcnt - network_depth;
+ ip_p = skb->data + network_depth;
+
+ if (proto == htons(ETH_P_IP)) {
+ struct iphdr *ipv4 = ip_p;
+
+ tcp = ip_p + sizeof(struct iphdr);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ ipv4->ttl = cqe->lro.min_ttl;
+ ipv4->tot_len = cpu_to_be16(tot_len);
+ ipv4->check = 0;
+ ipv4->check = ip_fast_csum((unsigned char *)ipv4,
+ ipv4->ihl);
+
+ mlx5e_lro_update_tcp_hdr(cqe, tcp);
+ check = csum_partial(tcp, tcp->doff * 4,
+ csum_unfold((__force __sum16)cqe->check_sum));
+ /* Almost done, don't forget the pseudo header */
+ tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr,
+ tot_len - sizeof(struct iphdr),
+ IPPROTO_TCP, check);
+ } else {
+ u16 payload_len = tot_len - sizeof(struct ipv6hdr);
+ struct ipv6hdr *ipv6 = ip_p;
+
+ tcp = ip_p + sizeof(struct ipv6hdr);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+
+ ipv6->hop_limit = cqe->lro.min_ttl;
+ ipv6->payload_len = cpu_to_be16(payload_len);
+
+ mlx5e_lro_update_tcp_hdr(cqe, tcp);
+ check = csum_partial(tcp, tcp->doff * 4,
+ csum_unfold((__force __sum16)cqe->check_sum));
+ /* Almost done, don't forget the pseudo header */
+ tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len,
+ IPPROTO_TCP, check);
+ }
+}
+
+static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
+{
+ struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index];
+ u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom;
+
+ return page_address(last_head->page) + head_offset;
+}
+
+static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
+{
+ int udp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct udphdr *uh;
+
+ uh = (struct udphdr *)(skb->data + udp_off);
+ uh->len = htons(skb->len - udp_off);
+
+ if (uh->check)
+ uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr,
+ ipv4->daddr, 0);
+
+ skb->csum_start = (unsigned char *)uh - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+}
+
+static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6)
+{
+ int udp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct udphdr *uh;
+
+ uh = (struct udphdr *)(skb->data + udp_off);
+ uh->len = htons(skb->len - udp_off);
+
+ if (uh->check)
+ uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr,
+ &ipv6->daddr, 0);
+
+ skb->csum_start = (unsigned char *)uh - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+}
+
+static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct tcphdr *skb_tcp_hd)
+{
+ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
+ struct tcphdr *last_tcp_hd;
+ void *last_hd_addr;
+
+ last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
+ last_tcp_hd = last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff;
+ tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH);
+}
+
+static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4,
+ struct mlx5_cqe64 *cqe, bool match)
+{
+ int tcp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct tcphdr *tcp;
+
+ tcp = (struct tcphdr *)(skb->data + tcp_off);
+ if (match)
+ mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
+
+ tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr,
+ ipv4->daddr, 0);
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+
+ skb->csum_start = (unsigned char *)tcp - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+
+ if (tcp->cwr)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+}
+
+static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6,
+ struct mlx5_cqe64 *cqe, bool match)
+{
+ int tcp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct tcphdr *tcp;
+
+ tcp = (struct tcphdr *)(skb->data + tcp_off);
+ if (match)
+ mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
+
+ tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr,
+ &ipv6->daddr, 0);
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+ skb->csum_start = (unsigned char *)tcp - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+
+ if (tcp->cwr)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+}
+
+static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
+{
+ bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP));
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ if (is_ipv4) {
+ int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr);
+ struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff);
+ __be16 newlen = htons(skb->len - nhoff);
+
+ csum_replace2(&ipv4->check, ipv4->tot_len, newlen);
+ ipv4->tot_len = newlen;
+
+ if (ipv4->protocol == IPPROTO_TCP)
+ mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match);
+ else
+ mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4);
+ } else {
+ int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr);
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff);
+
+ ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6));
+
+ if (ipv6->nexthdr == IPPROTO_TCP)
+ mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match);
+ else
+ mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6);
+ }
+}
+
+static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb)
+{
+ u8 cht = cqe->rss_hash_type;
+ int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 :
+ (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 :
+ PKT_HASH_TYPE_NONE;
+ skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
+}
+
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
+ __be16 *proto)
+{
+ *proto = ((struct ethhdr *)skb->data)->h_proto;
+ *proto = __vlan_get_protocol(skb, *proto, network_depth);
+
+ if (*proto == htons(ETH_P_IP))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
+
+ if (*proto == htons(ETH_P_IPV6))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
+
+ return false;
+}
+
+static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
+{
+ int network_depth = 0;
+ __be16 proto;
+ void *ip;
+ int rc;
+
+ if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
+ return;
+
+ ip = skb->data + network_depth;
+ rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
+ IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
+
+ rq->stats->ecn_mark += !!rc;
+}
+
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
+{
+ void *ip_p = skb->data + network_depth;
+
+ return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+ ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
+#define MAX_PADDING 8
+
+static void
+tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
+ struct mlx5e_rq_stats *stats)
+{
+ stats->csum_complete_tail_slow++;
+ skb->csum = csum_block_add(skb->csum,
+ skb_checksum(skb, offset, len, 0),
+ offset);
+}
+
+static void
+tail_padding_csum(struct sk_buff *skb, int offset,
+ struct mlx5e_rq_stats *stats)
+{
+ u8 tail_padding[MAX_PADDING];
+ int len = skb->len - offset;
+ void *tail;
+
+ if (unlikely(len > MAX_PADDING)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ tail = skb_header_pointer(skb, offset, len, tail_padding);
+ if (unlikely(!tail)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ stats->csum_complete_tail++;
+ skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
+}
+
+static void
+mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto,
+ struct mlx5e_rq_stats *stats)
+{
+ struct ipv6hdr *ip6;
+ struct iphdr *ip4;
+ int pkt_len;
+
+ /* Fixup vlan headers, if any */
+ if (network_depth > ETH_HLEN)
+ /* CQE csum is calculated from the IP header and does
+ * not cover VLAN headers (if present). This will add
+ * the checksum manually.
+ */
+ skb->csum = csum_partial(skb->data + ETH_HLEN,
+ network_depth - ETH_HLEN,
+ skb->csum);
+
+ /* Fixup tail padding, if any */
+ switch (proto) {
+ case htons(ETH_P_IP):
+ ip4 = (struct iphdr *)(skb->data + network_depth);
+ pkt_len = network_depth + ntohs(ip4->tot_len);
+ break;
+ case htons(ETH_P_IPV6):
+ ip6 = (struct ipv6hdr *)(skb->data + network_depth);
+ pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+ break;
+ default:
+ return;
+ }
+
+ if (likely(pkt_len >= skb->len))
+ return;
+
+ tail_padding_csum(skb, pkt_len, stats);
+}
+
+static inline void mlx5e_handle_csum(struct net_device *netdev,
+ struct mlx5_cqe64 *cqe,
+ struct mlx5e_rq *rq,
+ struct sk_buff *skb,
+ bool lro)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+ int network_depth = 0;
+ __be16 proto;
+
+ if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
+ goto csum_none;
+
+ if (lro) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ stats->csum_unnecessary++;
+ return;
+ }
+
+ /* True when explicitly set via priv flag, or XDP prog is loaded */
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) ||
+ get_cqe_tls_offload(cqe))
+ goto csum_unnecessary;
+
+ /* CQE csum doesn't cover padding octets in short ethernet
+ * frames. And the pad field is appended prior to calculating
+ * and appending the FCS field.
+ *
+ * Detecting these padded frames requires to verify and parse
+ * IP headers, so we simply force all those small frames to be
+ * CHECKSUM_UNNECESSARY even if they are not padded.
+ */
+ if (short_frame(skb->len))
+ goto csum_unnecessary;
+
+ if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
+ if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
+ goto csum_unnecessary;
+
+ stats->csum_complete++;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+
+ if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state))
+ return; /* CQE csum covers all received bytes */
+
+ /* csum might need some fixups ...*/
+ mlx5e_skb_csum_fixup(skb, network_depth, proto, stats);
+ return;
+ }
+
+csum_unnecessary:
+ if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
+ (cqe->hds_ip_ext & CQE_L4_OK))) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (cqe_is_tunneled(cqe)) {
+ skb->csum_level = 1;
+ skb->encapsulation = 1;
+ stats->csum_unnecessary_inner++;
+ return;
+ }
+ stats->csum_unnecessary++;
+ return;
+ }
+csum_none:
+ skb->ip_summed = CHECKSUM_NONE;
+ stats->csum_none++;
+}
+
+#define MLX5E_CE_BIT_MASK 0x80
+
+static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct mlx5e_rq *rq,
+ struct sk_buff *skb)
+{
+ u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct net_device *netdev = rq->netdev;
+
+ skb->mac_len = ETH_HLEN;
+
+ if (unlikely(get_cqe_tls_offload(cqe)))
+ mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt);
+
+ if (unlikely(mlx5_ipsec_is_rx_flow(cqe)))
+ mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe);
+
+ if (unlikely(mlx5e_macsec_is_rx_flow(cqe)))
+ mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe);
+
+ if (lro_num_seg > 1) {
+ mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
+ skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
+ /* Subtract one since we already counted this as one
+ * "regular" packet in mlx5e_complete_rx_cqe()
+ */
+ stats->packets += lro_num_seg - 1;
+ stats->lro_packets++;
+ stats->lro_bytes += cqe_bcnt;
+ }
+
+ if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
+ skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time,
+ rq->clock, get_cqe_ts(cqe));
+ skb_record_rx_queue(skb, rq->ix);
+
+ if (likely(netdev->features & NETIF_F_RXHASH))
+ mlx5e_skb_set_hash(cqe, skb);
+
+ if (cqe_has_vlan(cqe)) {
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ be16_to_cpu(cqe->vlan_info));
+ stats->removed_vlan_packets++;
+ }
+
+ skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
+
+ mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
+ /* checking CE bit in cqe - MSB in ml_path field */
+ if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
+ mlx5e_enable_ecn(rq, skb);
+
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ if (unlikely(mlx5e_skb_is_multicast(skb)))
+ stats->mcast_packets++;
+}
+
+static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct sk_buff *skb)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->packets++;
+ stats->gro_packets++;
+ stats->bytes += cqe_bcnt;
+ stats->gro_bytes += cqe_bcnt;
+ if (NAPI_GRO_CB(skb)->count != 1)
+ return;
+ mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
+ skb_reset_network_header(skb);
+ if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) {
+ napi_gro_receive(rq->cq.napi, skb);
+ rq->hw_gro_data->skb = NULL;
+ }
+}
+
+static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct sk_buff *skb)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->packets++;
+ stats->bytes += cqe_bcnt;
+ mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
+}
+
+static inline
+struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
+ u32 frag_size, u16 headroom,
+ u32 cqe_bcnt, u32 metasize)
+{
+ struct sk_buff *skb = build_skb(va, frag_size);
+
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ skb_reserve(skb, headroom);
+ skb_put(skb, cqe_bcnt);
+
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+ return skb;
+}
+
+static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom,
+ u32 len, struct xdp_buff *xdp)
+{
+ xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
+ xdp_prepare_buff(xdp, va, headroom, len, true);
+}
+
+static struct sk_buff *
+mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
+{
+ union mlx5e_alloc_unit *au = wi->au;
+ u16 rx_headroom = rq->buff.headroom;
+ struct bpf_prog *prog;
+ struct sk_buff *skb;
+ u32 metasize = 0;
+ void *va, *data;
+ dma_addr_t addr;
+ u32 frag_size;
+
+ va = page_address(au->page) + wi->offset;
+ data = va + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
+ frag_size, rq->buff.map_dir);
+ net_prefetch(data);
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog) {
+ struct xdp_buff xdp;
+
+ net_prefetchw(va); /* xdp_frame data area */
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+ if (mlx5e_xdp_handle(rq, au->page, prog, &xdp))
+ return NULL; /* page/packet was consumed by XDP */
+
+ rx_headroom = xdp.data - xdp.data_hard_start;
+ metasize = xdp.data - xdp.data_meta;
+ cqe_bcnt = xdp.data_end - xdp.data;
+ }
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* queue up for recycling/reuse */
+ page_ref_inc(au->page);
+
+ return skb;
+}
+
+static struct sk_buff *
+mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
+{
+ struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
+ struct mlx5e_wqe_frag_info *head_wi = wi;
+ union mlx5e_alloc_unit *au = wi->au;
+ u16 rx_headroom = rq->buff.headroom;
+ struct skb_shared_info *sinfo;
+ u32 frag_consumed_bytes;
+ struct bpf_prog *prog;
+ struct xdp_buff xdp;
+ struct sk_buff *skb;
+ dma_addr_t addr;
+ u32 truesize;
+ void *va;
+
+ va = page_address(au->page) + wi->offset;
+ frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
+
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
+ rq->buff.frame0_sz, rq->buff.map_dir);
+ net_prefetchw(va); /* xdp_frame data area */
+ net_prefetch(va + rx_headroom);
+
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp);
+ sinfo = xdp_get_shared_info_from_buff(&xdp);
+ truesize = 0;
+
+ cqe_bcnt -= frag_consumed_bytes;
+ frag_info++;
+ wi++;
+
+ while (cqe_bcnt) {
+ skb_frag_t *frag;
+
+ au = wi->au;
+
+ frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
+
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
+ frag_consumed_bytes, rq->buff.map_dir);
+
+ if (!xdp_buff_has_frags(&xdp)) {
+ /* Init on the first fragment to avoid cold cache access
+ * when possible.
+ */
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(&xdp);
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags++];
+ __skb_frag_set_page(frag, au->page);
+ skb_frag_off_set(frag, wi->offset);
+ skb_frag_size_set(frag, frag_consumed_bytes);
+
+ if (page_is_pfmemalloc(au->page))
+ xdp_buff_set_frag_pfmemalloc(&xdp);
+
+ sinfo->xdp_frags_size += frag_consumed_bytes;
+ truesize += frag_info->frag_stride;
+
+ cqe_bcnt -= frag_consumed_bytes;
+ frag_info++;
+ wi++;
+ }
+
+ au = head_wi->au;
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog && mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
+ if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ int i;
+
+ for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++)
+ mlx5e_put_rx_frag(rq, &head_wi[i], true);
+ }
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz,
+ xdp.data - xdp.data_hard_start,
+ xdp.data_end - xdp.data,
+ xdp.data - xdp.data_meta);
+ if (unlikely(!skb))
+ return NULL;
+
+ page_ref_inc(au->page);
+
+ if (unlikely(xdp_buff_has_frags(&xdp))) {
+ int i;
+
+ /* sinfo->nr_frags is reset by build_skb, calculate again. */
+ xdp_update_skb_shared_info(skb, wi - head_wi - 1,
+ sinfo->xdp_frags_size, truesize,
+ xdp_buff_is_frag_pfmemalloc(&xdp));
+
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ page_ref_inc(skb_frag_page(frag));
+ }
+ }
+
+ return skb;
+}
+
+static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
+ struct mlx5e_priv *priv = rq->priv;
+
+ if (cqe_syndrome_needs_recover(err_cqe->syndrome) &&
+ !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) {
+ mlx5e_dump_error_cqe(&rq->cq, rq->rqn, err_cqe);
+ queue_work(priv->wq, &rq->recover_work);
+ }
+}
+
+static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ trigger_report(rq, cqe);
+ rq->stats->wqe_err++;
+}
+
+static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ mlx5e_handle_rx_err_cqe(rq, cqe);
+ goto free_wqe;
+ }
+
+ skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe,
+ mlx5e_skb_from_cqe_linear,
+ mlx5e_skb_from_cqe_nonlinear,
+ mlx5e_xsk_skb_from_cqe_linear,
+ rq, wi, cqe_bcnt);
+ if (!skb) {
+ /* probably for XDP */
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ /* do not return page to cache,
+ * it will be returned on XDP_TX completion.
+ */
+ goto wq_cyc_pop;
+ }
+ goto free_wqe;
+ }
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ if (mlx5e_cqe_regb_chain(cqe))
+ if (!mlx5e_tc_update_skb(cqe, skb)) {
+ dev_kfree_skb_any(skb);
+ goto free_wqe;
+ }
+
+ napi_gro_receive(rq->cq.napi, skb);
+
+free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, true);
+wq_cyc_pop:
+ mlx5_wq_cyc_pop(wq);
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct net_device *netdev = rq->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ mlx5e_handle_rx_err_cqe(rq, cqe);
+ goto free_wqe;
+ }
+
+ skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+ mlx5e_skb_from_cqe_linear,
+ mlx5e_skb_from_cqe_nonlinear,
+ rq, wi, cqe_bcnt);
+ if (!skb) {
+ /* probably for XDP */
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ /* do not return page to cache,
+ * it will be returned on XDP_TX completion.
+ */
+ goto wq_cyc_pop;
+ }
+ goto free_wqe;
+ }
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ if (rep->vlan && skb_vlan_tag_present(skb))
+ skb_vlan_pop(skb);
+
+ mlx5e_rep_tc_receive(cqe, rq, skb);
+
+free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, true);
+wq_cyc_pop:
+ mlx5_wq_cyc_pop(wq);
+}
+
+static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
+ u16 wqe_id = be16_to_cpu(cqe->wqe_id);
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id);
+ u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
+ u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
+ u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1);
+ u32 page_idx = wqe_offset >> rq->mpwqe.page_shift;
+ struct mlx5e_rx_wqe_ll *wqe;
+ struct mlx5_wq_ll *wq;
+ struct sk_buff *skb;
+ u16 cqe_bcnt;
+
+ wi->consumed_strides += cstrides;
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ mlx5e_handle_rx_err_cqe(rq, cqe);
+ goto mpwrq_cqe_out;
+ }
+
+ if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->mpwqe_filler_cqes++;
+ stats->mpwqe_filler_strides += cstrides;
+ goto mpwrq_cqe_out;
+ }
+
+ cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
+
+ skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
+ mlx5e_skb_from_cqe_mpwrq_linear,
+ mlx5e_skb_from_cqe_mpwrq_nonlinear,
+ rq, wi, cqe_bcnt, head_offset, page_idx);
+ if (!skb)
+ goto mpwrq_cqe_out;
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ mlx5e_rep_tc_receive(cqe, rq, skb);
+
+mpwrq_cqe_out:
+ if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+ return;
+
+ wq = &rq->mpwqe.wq;
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+ mlx5e_free_rx_mpwqe(rq, wi, true);
+ mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
+const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
+ .handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep,
+};
+#endif
+
+static void
+mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
+ union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset)
+{
+ net_prefetchw(skb->data);
+
+ while (data_bcnt) {
+ /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
+ u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt);
+ unsigned int truesize;
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ truesize = pg_consumed_bytes;
+ else
+ truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+
+ mlx5e_add_skb_frag(rq, skb, au, data_offset,
+ pg_consumed_bytes, truesize);
+
+ data_bcnt -= pg_consumed_bytes;
+ data_offset = 0;
+ au++;
+ }
+}
+
+static struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx)
+{
+ union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
+ u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
+ u32 frag_offset = head_offset + headlen;
+ u32 byte_cnt = cqe_bcnt - headlen;
+ union mlx5e_alloc_unit *head_au = au;
+ struct sk_buff *skb;
+ dma_addr_t addr;
+
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ net_prefetchw(skb->data);
+
+ /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
+ if (unlikely(frag_offset >= PAGE_SIZE)) {
+ au++;
+ frag_offset -= PAGE_SIZE;
+ }
+
+ mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset);
+ /* copy header */
+ addr = page_pool_get_dma_addr(head_au->page);
+ mlx5e_copy_skb_header(rq, skb, head_au->page, addr,
+ head_offset, head_offset, headlen);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += headlen;
+ skb->len += headlen;
+
+ return skb;
+}
+
+static struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx)
+{
+ union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
+ u16 rx_headroom = rq->buff.headroom;
+ struct bpf_prog *prog;
+ struct sk_buff *skb;
+ u32 metasize = 0;
+ void *va, *data;
+ dma_addr_t addr;
+ u32 frag_size;
+
+ /* Check packet size. Note LRO doesn't use linear SKB */
+ if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+ rq->stats->oversize_pkts_sw_drop++;
+ return NULL;
+ }
+
+ va = page_address(au->page) + head_offset;
+ data = va + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+
+ addr = page_pool_get_dma_addr(au->page);
+ dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset,
+ frag_size, rq->buff.map_dir);
+ net_prefetch(data);
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog) {
+ struct xdp_buff xdp;
+
+ net_prefetchw(va); /* xdp_frame data area */
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+ if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ rx_headroom = xdp.data - xdp.data_hard_start;
+ metasize = xdp.data - xdp.data_meta;
+ cqe_bcnt = xdp.data_end - xdp.data;
+ }
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* queue up for recycling/reuse */
+ page_ref_inc(au->page);
+
+ return skb;
+}
+
+static struct sk_buff *
+mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ struct mlx5_cqe64 *cqe, u16 header_index)
+{
+ struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index];
+ u16 head_offset = head->addr & (PAGE_SIZE - 1);
+ u16 head_size = cqe->shampo.header_size;
+ u16 rx_headroom = rq->buff.headroom;
+ struct sk_buff *skb = NULL;
+ void *hdr, *data;
+ u32 frag_size;
+
+ hdr = page_address(head->page) + head_offset;
+ data = hdr + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
+
+ if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
+ /* build SKB around header */
+ dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir);
+ prefetchw(hdr);
+ prefetch(data);
+ skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0);
+
+ if (unlikely(!skb))
+ return NULL;
+
+ /* queue up for recycling/reuse */
+ page_ref_inc(head->page);
+
+ } else {
+ /* allocate SKB and copy header for large header */
+ rq->stats->gro_large_hds++;
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(head_size, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ prefetchw(skb->data);
+ mlx5e_copy_skb_header(rq, skb, head->page, head->addr,
+ head_offset + rx_headroom,
+ rx_headroom, head_size);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += head_size;
+ skb->len += head_size;
+ }
+ return skb;
+}
+
+static void
+mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz)
+{
+ skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
+ unsigned int frag_size = skb_frag_size(last_frag);
+ unsigned int frag_truesize;
+
+ frag_truesize = ALIGN(frag_size, BIT(log_stride_sz));
+ skb->truesize += frag_truesize - frag_size;
+}
+
+static void
+mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
+{
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->gro_skbs++;
+ if (likely(skb_shinfo(skb)->nr_frags))
+ mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz);
+ if (NAPI_GRO_CB(skb)->count > 1)
+ mlx5e_shampo_update_hdr(rq, cqe, match);
+ napi_gro_receive(rq->cq.napi, skb);
+ rq->hw_gro_data->skb = NULL;
+}
+
+static bool
+mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt)
+{
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+
+ return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE;
+}
+
+static void
+mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u64 addr = shampo->info[header_index].addr;
+
+ if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
+ shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
+ mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true);
+ }
+ bitmap_clear(shampo->bitmap, header_index, 1);
+}
+
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
+ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
+ u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset);
+ u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
+ u32 data_offset = wqe_offset & (PAGE_SIZE - 1);
+ u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
+ u16 wqe_id = be16_to_cpu(cqe->wqe_id);
+ u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ u16 head_size = cqe->shampo.header_size;
+ struct sk_buff **skb = &rq->hw_gro_data->skb;
+ bool flush = cqe->shampo.flush;
+ bool match = cqe->shampo.match;
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct mlx5e_rx_wqe_ll *wqe;
+ union mlx5e_alloc_unit *au;
+ struct mlx5e_mpw_info *wi;
+ struct mlx5_wq_ll *wq;
+
+ wi = mlx5e_get_mpw_info(rq, wqe_id);
+ wi->consumed_strides += cstrides;
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ mlx5e_handle_rx_err_cqe(rq, cqe);
+ goto mpwrq_cqe_out;
+ }
+
+ if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+ stats->mpwqe_filler_cqes++;
+ stats->mpwqe_filler_strides += cstrides;
+ goto mpwrq_cqe_out;
+ }
+
+ stats->gro_match_packets += match;
+
+ if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) {
+ match = false;
+ mlx5e_shampo_flush_skb(rq, cqe, match);
+ }
+
+ if (!*skb) {
+ if (likely(head_size))
+ *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
+ else
+ *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset,
+ page_idx);
+ if (unlikely(!*skb))
+ goto free_hd_entry;
+
+ NAPI_GRO_CB(*skb)->count = 1;
+ skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size;
+ } else {
+ NAPI_GRO_CB(*skb)->count++;
+ if (NAPI_GRO_CB(*skb)->count == 2 &&
+ rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) {
+ void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
+ int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff -
+ sizeof(struct iphdr);
+ struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff);
+
+ rq->hw_gro_data->second_ip_id = ntohs(iph->id);
+ }
+ }
+
+ if (likely(head_size)) {
+ au = &wi->alloc_units[page_idx];
+ mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset);
+ }
+
+ mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
+ if (flush)
+ mlx5e_shampo_flush_skb(rq, cqe, match);
+free_hd_entry:
+ mlx5e_free_rx_shampo_hd_entry(rq, header_index);
+mpwrq_cqe_out:
+ if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+ return;
+
+ wq = &rq->mpwqe.wq;
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+ mlx5e_free_rx_mpwqe(rq, wi, true);
+ mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
+static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
+ u16 wqe_id = be16_to_cpu(cqe->wqe_id);
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id);
+ u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
+ u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
+ u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1);
+ u32 page_idx = wqe_offset >> rq->mpwqe.page_shift;
+ struct mlx5e_rx_wqe_ll *wqe;
+ struct mlx5_wq_ll *wq;
+ struct sk_buff *skb;
+ u16 cqe_bcnt;
+
+ wi->consumed_strides += cstrides;
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ mlx5e_handle_rx_err_cqe(rq, cqe);
+ goto mpwrq_cqe_out;
+ }
+
+ if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->mpwqe_filler_cqes++;
+ stats->mpwqe_filler_strides += cstrides;
+ goto mpwrq_cqe_out;
+ }
+
+ cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
+
+ skb = INDIRECT_CALL_3(rq->mpwqe.skb_from_cqe_mpwrq,
+ mlx5e_skb_from_cqe_mpwrq_linear,
+ mlx5e_skb_from_cqe_mpwrq_nonlinear,
+ mlx5e_xsk_skb_from_cqe_mpwrq_linear,
+ rq, wi, cqe_bcnt, head_offset, page_idx);
+ if (!skb)
+ goto mpwrq_cqe_out;
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ if (mlx5e_cqe_regb_chain(cqe))
+ if (!mlx5e_tc_update_skb(cqe, skb)) {
+ dev_kfree_skb_any(skb);
+ goto mpwrq_cqe_out;
+ }
+
+ napi_gro_receive(rq->cq.napi, skb);
+
+mpwrq_cqe_out:
+ if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+ return;
+
+ wq = &rq->mpwqe.wq;
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+ mlx5e_free_rx_mpwqe(rq, wi, true);
+ mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
+int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
+{
+ struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
+ struct mlx5_cqwq *cqwq = &cq->wq;
+ struct mlx5_cqe64 *cqe;
+ int work_done = 0;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return 0;
+
+ if (rq->cqd.left) {
+ work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
+ if (work_done >= budget)
+ goto out;
+ }
+
+ cqe = mlx5_cqwq_get_cqe(cqwq);
+ if (!cqe) {
+ if (unlikely(work_done))
+ goto out;
+ return 0;
+ }
+
+ do {
+ if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
+ work_done +=
+ mlx5e_decompress_cqes_start(rq, cqwq,
+ budget - work_done);
+ continue;
+ }
+
+ mlx5_cqwq_pop(cqwq);
+
+ INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
+ rq, cqe);
+ } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
+
+out:
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb)
+ mlx5e_shampo_flush_skb(rq, NULL, false);
+
+ if (rcu_access_pointer(rq->xdp_prog))
+ mlx5e_xdp_rx_poll_complete(rq);
+
+ mlx5_cqwq_update_db_record(cqwq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ return work_done;
+}
+
+#ifdef CONFIG_MLX5_CORE_IPOIB
+
+#define MLX5_IB_GRH_SGID_OFFSET 8
+#define MLX5_IB_GRH_DGID_OFFSET 24
+#define MLX5_GID_SIZE 16
+
+static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct sk_buff *skb)
+{
+ struct hwtstamp_config *tstamp;
+ struct mlx5e_rq_stats *stats;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ char *pseudo_header;
+ u32 flags_rqpn;
+ u32 qpn;
+ u8 *dgid;
+ u8 g;
+
+ qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff;
+ netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn);
+
+ /* No mapping present, cannot process SKB. This might happen if a child
+ * interface is going down while having unprocessed CQEs on parent RQ
+ */
+ if (unlikely(!netdev)) {
+ /* TODO: add drop counters support */
+ skb->dev = NULL;
+ pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn);
+ return;
+ }
+
+ priv = mlx5i_epriv(netdev);
+ tstamp = &priv->tstamp;
+ stats = &priv->channel_stats[rq->ix]->rq;
+
+ flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
+ g = (flags_rqpn >> 28) & 3;
+ dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
+ if ((!g) || dgid[0] != 0xff)
+ skb->pkt_type = PACKET_HOST;
+ else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0)
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+
+ /* Drop packets that this interface sent, ie multicast packets
+ * that the HCA has replicated.
+ */
+ if (g && (qpn == (flags_rqpn & 0xffffff)) &&
+ (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET,
+ MLX5_GID_SIZE) == 0)) {
+ skb->dev = NULL;
+ return;
+ }
+
+ skb_pull(skb, MLX5_IB_GRH_BYTES);
+
+ skb->protocol = *((__be16 *)(skb->data));
+
+ if (netdev->features & NETIF_F_RXCSUM) {
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+ stats->csum_complete++;
+ } else {
+ skb->ip_summed = CHECKSUM_NONE;
+ stats->csum_none++;
+ }
+
+ if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
+ skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time,
+ rq->clock, get_cqe_ts(cqe));
+ skb_record_rx_queue(skb, rq->ix);
+
+ if (likely(netdev->features & NETIF_F_RXHASH))
+ mlx5e_skb_set_hash(cqe, skb);
+
+ /* 20 bytes of ipoib header and 4 for encap existing */
+ pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN);
+ memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN);
+ skb_reset_mac_header(skb);
+ skb_pull(skb, MLX5_IPOIB_HARD_LEN);
+
+ skb->dev = netdev;
+
+ stats->packets++;
+ stats->bytes += cqe_bcnt;
+}
+
+static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ rq->stats->wqe_err++;
+ goto wq_free_wqe;
+ }
+
+ skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+ mlx5e_skb_from_cqe_linear,
+ mlx5e_skb_from_cqe_nonlinear,
+ rq, wi, cqe_bcnt);
+ if (!skb)
+ goto wq_free_wqe;
+
+ mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ if (unlikely(!skb->dev)) {
+ dev_kfree_skb_any(skb);
+ goto wq_free_wqe;
+ }
+ napi_gro_receive(rq->cq.napi, skb);
+
+wq_free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, true);
+ mlx5_wq_cyc_pop(wq);
+}
+
+const struct mlx5e_rx_handlers mlx5i_rx_handlers = {
+ .handle_rx_cqe = mlx5i_handle_rx_cqe,
+ .handle_rx_cqe_mpwqe = NULL, /* Not supported */
+};
+#endif /* CONFIG_MLX5_CORE_IPOIB */
+
+int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk)
+{
+ struct net_device *netdev = rq->netdev;
+ struct mlx5_core_dev *mdev = rq->mdev;
+ struct mlx5e_priv *priv = rq->priv;
+
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
+ mlx5e_xsk_skb_from_cqe_mpwrq_linear :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
+ mlx5e_skb_from_cqe_mpwrq_linear :
+ mlx5e_skb_from_cqe_mpwrq_nonlinear;
+ rq->post_wqes = mlx5e_post_rx_mpwqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo;
+ if (!rq->handle_rx_cqe) {
+ netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n");
+ return -EINVAL;
+ }
+ } else {
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
+ if (!rq->handle_rx_cqe) {
+ netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
+ return -EINVAL;
+ }
+ }
+
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ rq->wqe.skb_from_cqe = xsk ?
+ mlx5e_xsk_skb_from_cqe_linear :
+ mlx5e_rx_is_linear_skb(mdev, params, NULL) ?
+ mlx5e_skb_from_cqe_linear :
+ mlx5e_skb_from_cqe_nonlinear;
+ rq->post_wqes = mlx5e_post_rx_wqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe;
+ if (!rq->handle_rx_cqe) {
+ netdev_err(netdev, "RX handler of RQ is not set\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5e_priv *priv = netdev_priv(rq->netdev);
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct devlink_port *dl_port;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 trap_id;
+ u16 ci;
+
+ trap_id = get_cqe_flow_tag(cqe);
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ rq->stats->wqe_err++;
+ goto free_wqe;
+ }
+
+ skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt);
+ if (!skb)
+ goto free_wqe;
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ skb_push(skb, ETH_HLEN);
+
+ dl_port = mlx5e_devlink_get_dl_port(priv);
+ mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port);
+ dev_kfree_skb_any(skb);
+
+free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, false);
+ mlx5_wq_cyc_pop(wq);
+}
+
+void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params)
+{
+ rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(rq->mdev, params, NULL) ?
+ mlx5e_skb_from_cqe_linear :
+ mlx5e_skb_from_cqe_nonlinear;
+ rq->post_wqes = mlx5e_post_rx_wqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
+ rq->handle_rx_cqe = mlx5e_trap_handle_rx_cqe;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
new file mode 100644
index 000000000..08a75654f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/udp.h>
+#include "en.h"
+#include "en/port.h"
+#include "eswitch.h"
+
+static int mlx5e_test_health_info(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_health *health = &priv->mdev->priv.health;
+
+ return health->fatal_error ? 1 : 0;
+}
+
+static int mlx5e_test_link_state(struct mlx5e_priv *priv)
+{
+ u8 port_state;
+
+ if (!netif_carrier_ok(priv->netdev))
+ return 1;
+
+ port_state = mlx5_query_vport_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0);
+ return port_state == VPORT_STATE_UP ? 0 : 1;
+}
+
+static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
+{
+ u32 speed;
+
+ if (!netif_carrier_ok(priv->netdev))
+ return 1;
+
+ return mlx5e_port_linkspeed(priv->mdev, &speed);
+}
+
+struct mlx5ehdr {
+ __be32 version;
+ __be64 magic;
+};
+
+#ifdef CONFIG_INET
+/* loopback test */
+#define MLX5E_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) +\
+ sizeof(struct udphdr) + sizeof(struct mlx5ehdr))
+#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL
+
+static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
+{
+ struct sk_buff *skb = NULL;
+ struct mlx5ehdr *mlxh;
+ struct ethhdr *ethh;
+ struct udphdr *udph;
+ struct iphdr *iph;
+ int iplen;
+
+ skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE);
+ if (!skb) {
+ netdev_err(priv->netdev, "\tFailed to alloc loopback skb\n");
+ return NULL;
+ }
+
+ net_prefetchw(skb->data);
+ skb_reserve(skb, NET_IP_ALIGN);
+
+ /* Reserve for ethernet and IP header */
+ ethh = skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+
+ skb_set_network_header(skb, skb->len);
+ iph = skb_put(skb, sizeof(struct iphdr));
+
+ skb_set_transport_header(skb, skb->len);
+ udph = skb_put(skb, sizeof(struct udphdr));
+
+ /* Fill ETH header */
+ ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr);
+ eth_zero_addr(ethh->h_source);
+ ethh->h_proto = htons(ETH_P_IP);
+
+ /* Fill UDP header */
+ udph->source = htons(9);
+ udph->dest = htons(9); /* Discard Protocol */
+ udph->len = htons(sizeof(struct mlx5ehdr) + sizeof(struct udphdr));
+ udph->check = 0;
+
+ /* Fill IP header */
+ iph->ihl = 5;
+ iph->ttl = 32;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iplen = sizeof(struct iphdr) + sizeof(struct udphdr) +
+ sizeof(struct mlx5ehdr);
+ iph->tot_len = htons(iplen);
+ iph->frag_off = 0;
+ iph->saddr = 0;
+ iph->daddr = 0;
+ iph->tos = 0;
+ iph->id = 0;
+ ip_send_check(iph);
+
+ /* Fill test header and data */
+ mlxh = skb_put(skb, sizeof(*mlxh));
+ mlxh->version = 0;
+ mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC);
+
+ skb->csum = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ udp4_hwcsum(skb, iph->saddr, iph->daddr);
+
+ skb->protocol = htons(ETH_P_IP);
+ skb->pkt_type = PACKET_HOST;
+ skb->dev = priv->netdev;
+
+ return skb;
+}
+
+struct mlx5e_lbt_priv {
+ struct packet_type pt;
+ struct completion comp;
+ bool loopback_ok;
+ bool local_lb;
+};
+
+static int
+mlx5e_test_loopback_validate(struct sk_buff *skb,
+ struct net_device *ndev,
+ struct packet_type *pt,
+ struct net_device *orig_ndev)
+{
+ struct mlx5e_lbt_priv *lbtp = pt->af_packet_priv;
+ struct mlx5ehdr *mlxh;
+ struct ethhdr *ethh;
+ struct udphdr *udph;
+ struct iphdr *iph;
+
+ /* We are only going to peek, no need to clone the SKB */
+ if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
+ goto out;
+
+ ethh = (struct ethhdr *)skb_mac_header(skb);
+ if (!ether_addr_equal(ethh->h_dest, orig_ndev->dev_addr))
+ goto out;
+
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_UDP)
+ goto out;
+
+ /* Don't assume skb_transport_header() was set */
+ udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl);
+ if (udph->dest != htons(9))
+ goto out;
+
+ mlxh = (struct mlx5ehdr *)((char *)udph + sizeof(*udph));
+ if (mlxh->magic != cpu_to_be64(MLX5E_TEST_MAGIC))
+ goto out; /* so close ! */
+
+ /* bingo */
+ lbtp->loopback_ok = true;
+ complete(&lbtp->comp);
+out:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
+ struct mlx5e_lbt_priv *lbtp)
+{
+ int err = 0;
+
+ /* Temporarily enable local_lb */
+ err = mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb);
+ if (err)
+ return err;
+
+ if (!lbtp->local_lb) {
+ err = mlx5_nic_vport_update_local_lb(priv->mdev, true);
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_refresh_tirs(priv, true, false);
+ if (err)
+ goto out;
+
+ lbtp->loopback_ok = false;
+ init_completion(&lbtp->comp);
+
+ lbtp->pt.type = htons(ETH_P_IP);
+ lbtp->pt.func = mlx5e_test_loopback_validate;
+ lbtp->pt.dev = priv->netdev;
+ lbtp->pt.af_packet_priv = lbtp;
+ dev_add_pack(&lbtp->pt);
+
+ return 0;
+
+out:
+ if (!lbtp->local_lb)
+ mlx5_nic_vport_update_local_lb(priv->mdev, false);
+
+ return err;
+}
+
+static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
+ struct mlx5e_lbt_priv *lbtp)
+{
+ if (!lbtp->local_lb)
+ mlx5_nic_vport_update_local_lb(priv->mdev, false);
+
+ dev_remove_pack(&lbtp->pt);
+ mlx5e_refresh_tirs(priv, false, false);
+}
+
+static int mlx5e_cond_loopback(struct mlx5e_priv *priv)
+{
+ if (is_mdev_switchdev_mode(priv->mdev))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200))
+static int mlx5e_test_loopback(struct mlx5e_priv *priv)
+{
+ struct mlx5e_lbt_priv *lbtp;
+ struct sk_buff *skb = NULL;
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ netdev_err(priv->netdev,
+ "\tCan't perform loopback test while device is down\n");
+ return -ENODEV;
+ }
+
+ lbtp = kzalloc(sizeof(*lbtp), GFP_KERNEL);
+ if (!lbtp)
+ return -ENOMEM;
+ lbtp->loopback_ok = false;
+
+ err = mlx5e_test_loopback_setup(priv, lbtp);
+ if (err)
+ goto out;
+
+ skb = mlx5e_test_get_udp_skb(priv);
+ if (!skb) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ skb_set_queue_mapping(skb, 0);
+ err = dev_queue_xmit(skb);
+ if (err) {
+ netdev_err(priv->netdev,
+ "\tFailed to xmit loopback packet err(%d)\n",
+ err);
+ goto cleanup;
+ }
+
+ wait_for_completion_timeout(&lbtp->comp, MLX5E_LB_VERIFY_TIMEOUT);
+ err = !lbtp->loopback_ok;
+
+cleanup:
+ mlx5e_test_loopback_cleanup(priv, lbtp);
+out:
+ kfree(lbtp);
+ return err;
+}
+#endif
+
+typedef int (*mlx5e_st_func)(struct mlx5e_priv *);
+
+struct mlx5e_st {
+ char name[ETH_GSTRING_LEN];
+ mlx5e_st_func st_func;
+ mlx5e_st_func cond_func;
+};
+
+static struct mlx5e_st mlx5e_sts[] = {
+ { "Link Test", mlx5e_test_link_state },
+ { "Speed Test", mlx5e_test_link_speed },
+ { "Health Test", mlx5e_test_health_info },
+#ifdef CONFIG_INET
+ { "Loopback Test", mlx5e_test_loopback, mlx5e_cond_loopback },
+#endif
+};
+
+#define MLX5E_ST_NUM ARRAY_SIZE(mlx5e_sts)
+
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+ u64 *buf)
+{
+ struct mlx5e_priv *priv = netdev_priv(ndev);
+ int i, count = 0;
+
+ mutex_lock(&priv->state_lock);
+ netdev_info(ndev, "Self test begin..\n");
+
+ for (i = 0; i < MLX5E_ST_NUM; i++) {
+ struct mlx5e_st st = mlx5e_sts[i];
+
+ if (st.cond_func && st.cond_func(priv))
+ continue;
+ netdev_info(ndev, "\t[%d] %s start..\n", i, st.name);
+ buf[count] = st.st_func(priv);
+ netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", i, st.name, buf[count]);
+ count++;
+ }
+
+ mutex_unlock(&priv->state_lock);
+
+ for (i = 0; i < count; i++) {
+ if (buf[i]) {
+ etest->flags |= ETH_TEST_FL_FAILED;
+ break;
+ }
+ }
+ netdev_info(ndev, "Self test out: status flags(0x%x)\n",
+ etest->flags);
+}
+
+int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data)
+{
+ int i, count = 0;
+
+ for (i = 0; i < MLX5E_ST_NUM; i++) {
+ struct mlx5e_st st = mlx5e_sts[i];
+
+ if (st.cond_func && st.cond_func(priv))
+ continue;
+ if (data)
+ strcpy(data + count * ETH_GSTRING_LEN, st.name);
+ count++;
+ }
+ return count;
+}
+
+int mlx5e_self_test_num(struct mlx5e_priv *priv)
+{
+ return mlx5e_self_test_fill_strings(priv, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
new file mode 100644
index 000000000..f7f54550a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -0,0 +1,2482 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "lib/mlx5.h"
+#include "en.h"
+#include "en_accel/ktls.h"
+#include "en_accel/en_accel.h"
+#include "en/ptp.h"
+#include "en/port.h"
+
+#ifdef CONFIG_PAGE_POOL_STATS
+#include <net/page_pool.h>
+#endif
+
+static unsigned int stats_grps_num(struct mlx5e_priv *priv)
+{
+ return !priv->profile->stats_grps_num ? 0 :
+ priv->profile->stats_grps_num(priv);
+}
+
+unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv)
+{
+ mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+ const unsigned int num_stats_grps = stats_grps_num(priv);
+ unsigned int total = 0;
+ int i;
+
+ for (i = 0; i < num_stats_grps; i++)
+ total += stats_grps[i]->get_num_stats(priv);
+
+ return total;
+}
+
+void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv)
+{
+ mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+ const unsigned int num_stats_grps = stats_grps_num(priv);
+ int i;
+
+ for (i = num_stats_grps - 1; i >= 0; i--)
+ if (stats_grps[i]->update_stats &&
+ stats_grps[i]->update_stats_mask & MLX5E_NDO_UPDATE_STATS)
+ stats_grps[i]->update_stats(priv);
+}
+
+void mlx5e_stats_update(struct mlx5e_priv *priv)
+{
+ mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+ const unsigned int num_stats_grps = stats_grps_num(priv);
+ int i;
+
+ for (i = num_stats_grps - 1; i >= 0; i--)
+ if (stats_grps[i]->update_stats)
+ stats_grps[i]->update_stats(priv);
+}
+
+void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+ const unsigned int num_stats_grps = stats_grps_num(priv);
+ int i;
+
+ for (i = 0; i < num_stats_grps; i++)
+ idx = stats_grps[i]->fill_stats(priv, data, idx);
+}
+
+void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data)
+{
+ mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+ const unsigned int num_stats_grps = stats_grps_num(priv);
+ int i, idx = 0;
+
+ for (i = 0; i < num_stats_grps; i++)
+ idx = stats_grps[i]->fill_strings(priv, data, idx);
+}
+
+/* Concrete NIC Stats */
+
+static const struct counter_desc sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) },
+
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_skip_no_sync_data) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) },
+#endif
+
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_skbs) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_match_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_large_hds) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_nops) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_nops) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_strides) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
+#ifdef CONFIG_PAGE_POOL_STATS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow_high_order) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_empty) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_refill) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_waive) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cached) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cache_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_released_ref) },
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_pkt) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_start) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_skip) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_ok) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_retry) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_skip) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_err) },
+#endif
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) },
+};
+
+#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw)
+{
+ return NUM_SW_COUNTERS;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw)
+{
+ int i;
+
+ for (i = 0; i < NUM_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw)
+{
+ int i;
+
+ for (i = 0; i < NUM_SW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_xdp_red(struct mlx5e_sw_stats *s,
+ struct mlx5e_xdpsq_stats *xdpsq_red_stats)
+{
+ s->tx_xdp_xmit += xdpsq_red_stats->xmit;
+ s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe;
+ s->tx_xdp_inlnw += xdpsq_red_stats->inlnw;
+ s->tx_xdp_nops += xdpsq_red_stats->nops;
+ s->tx_xdp_full += xdpsq_red_stats->full;
+ s->tx_xdp_err += xdpsq_red_stats->err;
+ s->tx_xdp_cqes += xdpsq_red_stats->cqes;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_xdpsq(struct mlx5e_sw_stats *s,
+ struct mlx5e_xdpsq_stats *xdpsq_stats)
+{
+ s->rx_xdp_tx_xmit += xdpsq_stats->xmit;
+ s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe;
+ s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw;
+ s->rx_xdp_tx_nops += xdpsq_stats->nops;
+ s->rx_xdp_tx_full += xdpsq_stats->full;
+ s->rx_xdp_tx_err += xdpsq_stats->err;
+ s->rx_xdp_tx_cqe += xdpsq_stats->cqes;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_xsksq(struct mlx5e_sw_stats *s,
+ struct mlx5e_xdpsq_stats *xsksq_stats)
+{
+ s->tx_xsk_xmit += xsksq_stats->xmit;
+ s->tx_xsk_mpwqe += xsksq_stats->mpwqe;
+ s->tx_xsk_inlnw += xsksq_stats->inlnw;
+ s->tx_xsk_full += xsksq_stats->full;
+ s->tx_xsk_err += xsksq_stats->err;
+ s->tx_xsk_cqes += xsksq_stats->cqes;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_xskrq(struct mlx5e_sw_stats *s,
+ struct mlx5e_rq_stats *xskrq_stats)
+{
+ s->rx_xsk_packets += xskrq_stats->packets;
+ s->rx_xsk_bytes += xskrq_stats->bytes;
+ s->rx_xsk_csum_complete += xskrq_stats->csum_complete;
+ s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary;
+ s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner;
+ s->rx_xsk_csum_none += xskrq_stats->csum_none;
+ s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark;
+ s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets;
+ s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop;
+ s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect;
+ s->rx_xsk_wqe_err += xskrq_stats->wqe_err;
+ s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes;
+ s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides;
+ s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop;
+ s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err;
+ s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks;
+ s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts;
+ s->rx_xsk_congst_umr += xskrq_stats->congst_umr;
+ s->rx_xsk_arfs_err += xskrq_stats->arfs_err;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
+ struct mlx5e_rq_stats *rq_stats)
+{
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
+ s->rx_lro_packets += rq_stats->lro_packets;
+ s->rx_lro_bytes += rq_stats->lro_bytes;
+ s->rx_gro_packets += rq_stats->gro_packets;
+ s->rx_gro_bytes += rq_stats->gro_bytes;
+ s->rx_gro_skbs += rq_stats->gro_skbs;
+ s->rx_gro_match_packets += rq_stats->gro_match_packets;
+ s->rx_gro_large_hds += rq_stats->gro_large_hds;
+ s->rx_ecn_mark += rq_stats->ecn_mark;
+ s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
+ s->rx_csum_none += rq_stats->csum_none;
+ s->rx_csum_complete += rq_stats->csum_complete;
+ s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+ s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
+ s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
+ s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
+ s->rx_xdp_drop += rq_stats->xdp_drop;
+ s->rx_xdp_redirect += rq_stats->xdp_redirect;
+ s->rx_wqe_err += rq_stats->wqe_err;
+ s->rx_mpwqe_filler_cqes += rq_stats->mpwqe_filler_cqes;
+ s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
+ s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop;
+ s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
+ s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
+ s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
+ s->rx_cache_reuse += rq_stats->cache_reuse;
+ s->rx_cache_full += rq_stats->cache_full;
+ s->rx_cache_empty += rq_stats->cache_empty;
+ s->rx_cache_busy += rq_stats->cache_busy;
+ s->rx_cache_waive += rq_stats->cache_waive;
+ s->rx_congst_umr += rq_stats->congst_umr;
+ s->rx_arfs_err += rq_stats->arfs_err;
+ s->rx_recover += rq_stats->recover;
+#ifdef CONFIG_PAGE_POOL_STATS
+ s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast;
+ s->rx_pp_alloc_slow += rq_stats->pp_alloc_slow;
+ s->rx_pp_alloc_empty += rq_stats->pp_alloc_empty;
+ s->rx_pp_alloc_refill += rq_stats->pp_alloc_refill;
+ s->rx_pp_alloc_waive += rq_stats->pp_alloc_waive;
+ s->rx_pp_alloc_slow_high_order += rq_stats->pp_alloc_slow_high_order;
+ s->rx_pp_recycle_cached += rq_stats->pp_recycle_cached;
+ s->rx_pp_recycle_cache_full += rq_stats->pp_recycle_cache_full;
+ s->rx_pp_recycle_ring += rq_stats->pp_recycle_ring;
+ s->rx_pp_recycle_ring_full += rq_stats->pp_recycle_ring_full;
+ s->rx_pp_recycle_released_ref += rq_stats->pp_recycle_released_ref;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets;
+ s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes;
+ s->rx_tls_resync_req_pkt += rq_stats->tls_resync_req_pkt;
+ s->rx_tls_resync_req_start += rq_stats->tls_resync_req_start;
+ s->rx_tls_resync_req_end += rq_stats->tls_resync_req_end;
+ s->rx_tls_resync_req_skip += rq_stats->tls_resync_req_skip;
+ s->rx_tls_resync_res_ok += rq_stats->tls_resync_res_ok;
+ s->rx_tls_resync_res_retry += rq_stats->tls_resync_res_retry;
+ s->rx_tls_resync_res_skip += rq_stats->tls_resync_res_skip;
+ s->rx_tls_err += rq_stats->tls_err;
+#endif
+}
+
+static void mlx5e_stats_grp_sw_update_stats_ch_stats(struct mlx5e_sw_stats *s,
+ struct mlx5e_ch_stats *ch_stats)
+{
+ s->ch_events += ch_stats->events;
+ s->ch_poll += ch_stats->poll;
+ s->ch_arm += ch_stats->arm;
+ s->ch_aff_change += ch_stats->aff_change;
+ s->ch_force_irq += ch_stats->force_irq;
+ s->ch_eq_rearm += ch_stats->eq_rearm;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_sq(struct mlx5e_sw_stats *s,
+ struct mlx5e_sq_stats *sq_stats)
+{
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_tso_packets += sq_stats->tso_packets;
+ s->tx_tso_bytes += sq_stats->tso_bytes;
+ s->tx_tso_inner_packets += sq_stats->tso_inner_packets;
+ s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes;
+ s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
+ s->tx_nop += sq_stats->nop;
+ s->tx_mpwqe_blks += sq_stats->mpwqe_blks;
+ s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts;
+ s->tx_queue_stopped += sq_stats->stopped;
+ s->tx_queue_wake += sq_stats->wake;
+ s->tx_queue_dropped += sq_stats->dropped;
+ s->tx_cqe_err += sq_stats->cqe_err;
+ s->tx_recover += sq_stats->recover;
+ s->tx_xmit_more += sq_stats->xmit_more;
+ s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
+ s->tx_csum_none += sq_stats->csum_none;
+ s->tx_csum_partial += sq_stats->csum_partial;
+#ifdef CONFIG_MLX5_EN_TLS
+ s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets;
+ s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes;
+ s->tx_tls_ooo += sq_stats->tls_ooo;
+ s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes;
+ s->tx_tls_dump_packets += sq_stats->tls_dump_packets;
+ s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+ s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data;
+ s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data;
+ s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req;
+#endif
+ s->tx_cqes += sq_stats->cqes;
+}
+
+static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv,
+ struct mlx5e_sw_stats *s)
+{
+ int i;
+
+ if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
+ return;
+
+ mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch);
+
+ if (priv->tx_ptp_opened) {
+ for (i = 0; i < priv->max_opened_tc; i++) {
+ mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]);
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
+ }
+ }
+ if (priv->rx_ptp_opened) {
+ mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq);
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
+ }
+}
+
+static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv,
+ struct mlx5e_sw_stats *s)
+{
+ struct mlx5e_sq_stats **stats;
+ u16 max_qos_sqs;
+ int i;
+
+ /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+ max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs);
+ stats = READ_ONCE(priv->htb_qos_sq_stats);
+
+ for (i = 0; i < max_qos_sqs; i++) {
+ mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i]));
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
+ }
+}
+
+#ifdef CONFIG_PAGE_POOL_STATS
+static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c)
+{
+ struct mlx5e_rq_stats *rq_stats = c->rq.stats;
+ struct page_pool *pool = c->rq.page_pool;
+ struct page_pool_stats stats = { 0 };
+
+ if (!page_pool_get_stats(pool, &stats))
+ return;
+
+ rq_stats->pp_alloc_fast = stats.alloc_stats.fast;
+ rq_stats->pp_alloc_slow = stats.alloc_stats.slow;
+ rq_stats->pp_alloc_slow_high_order = stats.alloc_stats.slow_high_order;
+ rq_stats->pp_alloc_empty = stats.alloc_stats.empty;
+ rq_stats->pp_alloc_waive = stats.alloc_stats.waive;
+ rq_stats->pp_alloc_refill = stats.alloc_stats.refill;
+
+ rq_stats->pp_recycle_cached = stats.recycle_stats.cached;
+ rq_stats->pp_recycle_cache_full = stats.recycle_stats.cache_full;
+ rq_stats->pp_recycle_ring = stats.recycle_stats.ring;
+ rq_stats->pp_recycle_ring_full = stats.recycle_stats.ring_full;
+ rq_stats->pp_recycle_released_ref = stats.recycle_stats.released_refcnt;
+}
+#else
+static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c)
+{
+}
+#endif
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
+{
+ struct mlx5e_sw_stats *s = &priv->stats.sw;
+ int i;
+
+ memset(s, 0, sizeof(*s));
+
+ for (i = 0; i < priv->channels.num; i++) /* for active channels only */
+ mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]);
+
+ for (i = 0; i < priv->stats_nch; i++) {
+ struct mlx5e_channel_stats *channel_stats =
+ priv->channel_stats[i];
+
+ int j;
+
+ mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq);
+ mlx5e_stats_grp_sw_update_stats_xdpsq(s, &channel_stats->rq_xdpsq);
+ mlx5e_stats_grp_sw_update_stats_ch_stats(s, &channel_stats->ch);
+ /* xdp redirect */
+ mlx5e_stats_grp_sw_update_stats_xdp_red(s, &channel_stats->xdpsq);
+ /* AF_XDP zero-copy */
+ mlx5e_stats_grp_sw_update_stats_xskrq(s, &channel_stats->xskrq);
+ mlx5e_stats_grp_sw_update_stats_xsksq(s, &channel_stats->xsksq);
+
+ for (j = 0; j < priv->max_opened_tc; j++) {
+ mlx5e_stats_grp_sw_update_stats_sq(s, &channel_stats->sq[j]);
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
+ }
+ }
+ mlx5e_stats_grp_sw_update_stats_ptp(priv, s);
+ mlx5e_stats_grp_sw_update_stats_qos(priv, s);
+}
+
+static const struct counter_desc q_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) },
+};
+
+static const struct counter_desc drop_rq_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_if_down_packets) },
+};
+
+#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc)
+#define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qcnt)
+{
+ int num_stats = 0;
+
+ if (priv->q_counter)
+ num_stats += NUM_Q_COUNTERS;
+
+ if (priv->drop_rq_q_counter)
+ num_stats += NUM_DROP_RQ_COUNTERS;
+
+ return num_stats;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qcnt)
+{
+ int i;
+
+ for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ q_stats_desc[i].format);
+
+ for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ drop_rq_stats_desc[i].format);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt)
+{
+ int i;
+
+ for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+ data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+ q_stats_desc, i);
+ for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+ data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+ drop_rq_stats_desc, i);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt)
+{
+ struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ int ret;
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+
+ if (priv->q_counter) {
+ MLX5_SET(query_q_counter_in, in, counter_set_id,
+ priv->q_counter);
+ ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out);
+ if (!ret)
+ qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
+ out, out_of_buffer);
+ }
+
+ if (priv->drop_rq_q_counter) {
+ MLX5_SET(query_q_counter_in, in, counter_set_id,
+ priv->drop_rq_q_counter);
+ ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out);
+ if (!ret)
+ qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out,
+ out, out_of_buffer);
+ }
+}
+
+#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c)
+static const struct counter_desc vnic_env_stats_steer_desc[] = {
+ { "rx_steer_missed_packets",
+ VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) },
+};
+
+static const struct counter_desc vnic_env_stats_dev_oob_desc[] = {
+ { "dev_internal_queue_oob",
+ VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) },
+};
+
+static const struct counter_desc vnic_env_stats_drop_desc[] = {
+ { "rx_oversize_pkts_buffer",
+ VNIC_ENV_OFF(vport_env.eth_wqe_too_small) },
+};
+
+#define NUM_VNIC_ENV_STEER_COUNTERS(dev) \
+ (MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \
+ ARRAY_SIZE(vnic_env_stats_steer_desc) : 0)
+#define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \
+ (MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \
+ ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0)
+#define NUM_VNIC_ENV_DROP_COUNTERS(dev) \
+ (MLX5_CAP_GEN(dev, eth_wqe_too_small) ? \
+ ARRAY_SIZE(vnic_env_stats_drop_desc) : 0)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vnic_env)
+{
+ return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) +
+ NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev) +
+ NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env)
+{
+ int i;
+
+ for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ vnic_env_stats_steer_desc[i].format);
+
+ for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ vnic_env_stats_dev_oob_desc[i].format);
+
+ for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ vnic_env_stats_drop_desc[i].format);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env)
+{
+ int i;
+
+ for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out,
+ vnic_env_stats_steer_desc, i);
+
+ for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++)
+ data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out,
+ vnic_env_stats_dev_oob_desc, i);
+
+ for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++)
+ data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out,
+ vnic_env_stats_drop_desc, i);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env)
+{
+ u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out;
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!mlx5e_stats_grp_vnic_env_num_stats(priv))
+ return;
+
+ MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+ mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out);
+}
+
+#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
+static const struct counter_desc vport_stats_desc[] = {
+ { "rx_vport_unicast_packets",
+ VPORT_COUNTER_OFF(received_eth_unicast.packets) },
+ { "rx_vport_unicast_bytes",
+ VPORT_COUNTER_OFF(received_eth_unicast.octets) },
+ { "tx_vport_unicast_packets",
+ VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) },
+ { "tx_vport_unicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) },
+ { "rx_vport_multicast_packets",
+ VPORT_COUNTER_OFF(received_eth_multicast.packets) },
+ { "rx_vport_multicast_bytes",
+ VPORT_COUNTER_OFF(received_eth_multicast.octets) },
+ { "tx_vport_multicast_packets",
+ VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) },
+ { "tx_vport_multicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) },
+ { "rx_vport_broadcast_packets",
+ VPORT_COUNTER_OFF(received_eth_broadcast.packets) },
+ { "rx_vport_broadcast_bytes",
+ VPORT_COUNTER_OFF(received_eth_broadcast.octets) },
+ { "tx_vport_broadcast_packets",
+ VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) },
+ { "tx_vport_broadcast_bytes",
+ VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) },
+ { "rx_vport_rdma_unicast_packets",
+ VPORT_COUNTER_OFF(received_ib_unicast.packets) },
+ { "rx_vport_rdma_unicast_bytes",
+ VPORT_COUNTER_OFF(received_ib_unicast.octets) },
+ { "tx_vport_rdma_unicast_packets",
+ VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) },
+ { "tx_vport_rdma_unicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) },
+ { "rx_vport_rdma_multicast_packets",
+ VPORT_COUNTER_OFF(received_ib_multicast.packets) },
+ { "rx_vport_rdma_multicast_bytes",
+ VPORT_COUNTER_OFF(received_ib_multicast.octets) },
+ { "tx_vport_rdma_multicast_packets",
+ VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) },
+ { "tx_vport_rdma_multicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) },
+};
+
+#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport)
+{
+ return NUM_VPORT_COUNTERS;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out,
+ vport_stats_desc, i);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport)
+{
+ u32 *out = (u32 *)priv->stats.vport.query_vport_out;
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {};
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ mlx5_cmd_exec_inout(mdev, query_vport_counter, in, out);
+}
+
+#define PPORT_802_3_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_802_3_stats_desc[] = {
+ { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) },
+ { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) },
+ { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
+ { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) },
+ { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) },
+ { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
+ { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
+ { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
+ { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
+ { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) },
+ { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) },
+ { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) },
+ { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
+ { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
+ { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) },
+ { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) },
+ { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
+ { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
+};
+
+#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(802_3)
+{
+ return NUM_PPORT_802_3_COUNTERS;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(802_3)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(802_3)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters,
+ pport_802_3_stats_desc, i);
+ return idx;
+}
+
+#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \
+ (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1)
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->IEEE_802_3_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define MLX5E_READ_CTR64_BE_F(ptr, set, c) \
+ be64_to_cpu(*(__be64 *)((char *)ptr + \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.set.c##_high)))
+
+static int mlx5e_stats_get_ieee(struct mlx5_core_dev *mdev,
+ u32 *ppcnt_ieee_802_3)
+{
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+ return mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3,
+ sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
+ struct ethtool_pause_stats *pause_stats)
+{
+ u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+ return;
+
+ pause_stats->tx_pause_frames =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_pause_mac_ctrl_frames_transmitted);
+ pause_stats->rx_pause_frames =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_pause_mac_ctrl_frames_received);
+}
+
+void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_phy_stats *phy_stats)
+{
+ u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+ return;
+
+ phy_stats->SymbolErrorDuringCarrier =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_symbol_error_during_carrier);
+}
+
+void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+ return;
+
+#define RD(name) \
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, \
+ eth_802_3_cntrs_grp_data_layout, \
+ name)
+
+ mac_stats->FramesTransmittedOK = RD(a_frames_transmitted_ok);
+ mac_stats->FramesReceivedOK = RD(a_frames_received_ok);
+ mac_stats->FrameCheckSequenceErrors = RD(a_frame_check_sequence_errors);
+ mac_stats->OctetsTransmittedOK = RD(a_octets_transmitted_ok);
+ mac_stats->OctetsReceivedOK = RD(a_octets_received_ok);
+ mac_stats->MulticastFramesXmittedOK = RD(a_multicast_frames_xmitted_ok);
+ mac_stats->BroadcastFramesXmittedOK = RD(a_broadcast_frames_xmitted_ok);
+ mac_stats->MulticastFramesReceivedOK = RD(a_multicast_frames_received_ok);
+ mac_stats->BroadcastFramesReceivedOK = RD(a_broadcast_frames_received_ok);
+ mac_stats->InRangeLengthErrors = RD(a_in_range_length_errors);
+ mac_stats->OutOfRangeLengthField = RD(a_out_of_range_length_field);
+ mac_stats->FrameTooLongErrors = RD(a_frame_too_long_errors);
+#undef RD
+}
+
+void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+ return;
+
+ ctrl_stats->MACControlFramesTransmitted =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_mac_control_frames_transmitted);
+ ctrl_stats->MACControlFramesReceived =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_mac_control_frames_received);
+ ctrl_stats->UnsupportedOpcodesReceived =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ eth_802_3_cntrs_grp_data_layout,
+ a_unsupported_opcodes_received);
+}
+
+#define PPORT_2863_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_2863_stats_desc[] = {
+ { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) },
+ { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) },
+ { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) },
+};
+
+#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2863)
+{
+ return NUM_PPORT_2863_COUNTERS;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2863)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2863)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters,
+ pport_2863_stats_desc, i);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2863)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->RFC_2863_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PPORT_2819_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_2819_stats_desc[] = {
+ { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) },
+ { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) },
+ { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) },
+ { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) },
+ { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
+ { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
+ { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
+ { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
+ { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
+ { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
+ { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
+ { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
+ { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
+};
+
+#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2819)
+{
+ return NUM_PPORT_2819_COUNTERS;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2819)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2819)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters,
+ pport_2819_stats_desc, i);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->RFC_2819_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+static const struct ethtool_rmon_hist_range mlx5e_rmon_ranges[] = {
+ { 0, 64 },
+ { 65, 127 },
+ { 128, 255 },
+ { 256, 511 },
+ { 512, 1023 },
+ { 1024, 1518 },
+ { 1519, 2047 },
+ { 2048, 4095 },
+ { 4096, 8191 },
+ { 8192, 10239 },
+ {}
+};
+
+void mlx5e_stats_rmon_get(struct mlx5e_priv *priv,
+ struct ethtool_rmon_stats *rmon,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ u32 ppcnt_RFC_2819_counters[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
+ if (mlx5_core_access_reg(mdev, in, sz, ppcnt_RFC_2819_counters,
+ sz, MLX5_REG_PPCNT, 0, 0))
+ return;
+
+#define RD(name) \
+ MLX5E_READ_CTR64_BE_F(ppcnt_RFC_2819_counters, \
+ eth_2819_cntrs_grp_data_layout, \
+ name)
+
+ rmon->undersize_pkts = RD(ether_stats_undersize_pkts);
+ rmon->fragments = RD(ether_stats_fragments);
+ rmon->jabbers = RD(ether_stats_jabbers);
+
+ rmon->hist[0] = RD(ether_stats_pkts64octets);
+ rmon->hist[1] = RD(ether_stats_pkts65to127octets);
+ rmon->hist[2] = RD(ether_stats_pkts128to255octets);
+ rmon->hist[3] = RD(ether_stats_pkts256to511octets);
+ rmon->hist[4] = RD(ether_stats_pkts512to1023octets);
+ rmon->hist[5] = RD(ether_stats_pkts1024to1518octets);
+ rmon->hist[6] = RD(ether_stats_pkts1519to2047octets);
+ rmon->hist[7] = RD(ether_stats_pkts2048to4095octets);
+ rmon->hist[8] = RD(ether_stats_pkts4096to8191octets);
+ rmon->hist[9] = RD(ether_stats_pkts8192to10239octets);
+#undef RD
+
+ *ranges = mlx5e_rmon_ranges;
+}
+
+#define PPORT_PHY_STATISTICAL_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.phys_layer_statistical_cntrs.c##_high)
+static const struct counter_desc pport_phy_statistical_stats_desc[] = {
+ { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) },
+ { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
+};
+
+static const struct counter_desc
+pport_phy_statistical_err_lanes_stats_desc[] = {
+ { "rx_err_lane_0_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane0) },
+ { "rx_err_lane_1_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane1) },
+ { "rx_err_lane_2_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane2) },
+ { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) },
+};
+
+#define NUM_PPORT_PHY_STATISTICAL_COUNTERS \
+ ARRAY_SIZE(pport_phy_statistical_stats_desc)
+#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \
+ ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int num_stats;
+
+ /* "1" for link_down_events special counter */
+ num_stats = 1;
+
+ num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ?
+ NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0;
+
+ num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ?
+ NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0;
+
+ return num_stats;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int i;
+
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy");
+
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
+ return idx;
+
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_phy_statistical_stats_desc[i].format);
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_phy_statistical_err_lanes_stats_desc[i].format);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int i;
+
+ /* link_down_events_phy has special handling since it is not stored in __be64 format */
+ data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters,
+ counter_set.phys_layer_cntrs.link_down_events);
+
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
+ return idx;
+
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+ pport_phy_statistical_stats_desc, i);
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+ pport_phy_statistical_err_lanes_stats_desc,
+ i);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->phy_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
+ return;
+
+ out = pstats->phy_statistical_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+static int fec_num_lanes(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ int err;
+
+ MLX5_SET(pmlp_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_PMLP, 0, 0);
+ if (err)
+ return 0;
+
+ return MLX5_GET(pmlp_reg, out, width);
+}
+
+static int fec_active_mode(struct mlx5_core_dev *mdev)
+{
+ unsigned long fec_active_long;
+ u32 fec_active;
+
+ if (mlx5e_get_fec_mode(mdev, &fec_active, NULL))
+ return MLX5E_FEC_NOFEC;
+
+ fec_active_long = fec_active;
+ return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE);
+}
+
+#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \
+ fec_stats->corrected_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_corrected_blocks_lane##idx); \
+ fec_stats->uncorrectable_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_uncorrectable_blocks_lane##idx); \
+})
+
+static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats,
+ u32 *ppcnt, u8 lanes)
+{
+ if (lanes > 3) { /* 4 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(3);
+ MLX5E_STATS_SET_FEC_BLOCK(2);
+ }
+ if (lanes > 1) /* 2 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(1);
+ if (lanes > 0) /* 1 lane */
+ MLX5E_STATS_SET_FEC_BLOCK(0);
+}
+
+static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt)
+{
+ fec_stats->corrected_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_corrected_blocks);
+ fec_stats->uncorrectable_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_uncorrectable_blocks);
+}
+
+static void fec_set_block_stats(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int mode = fec_active_mode(mdev);
+
+ if (mode == MLX5E_FEC_NOFEC)
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ if (mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0))
+ return;
+
+ switch (mode) {
+ case MLX5E_FEC_RS_528_514:
+ case MLX5E_FEC_RS_544_514:
+ case MLX5E_FEC_LLRS_272_257_1:
+ fec_set_rs_stats(fec_stats, out);
+ return;
+ case MLX5E_FEC_FIRECODE:
+ fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev));
+ }
+}
+
+static void fec_set_corrected_bits_total(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
+ if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical,
+ sz, MLX5_REG_PPCNT, 0, 0))
+ return;
+
+ fec_stats->corrected_bits.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt_phy_statistical,
+ phys_layer_statistical_cntrs,
+ phy_corrected_bits);
+}
+
+void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group))
+ return;
+
+ fec_set_corrected_bits_total(priv, fec_stats);
+ fec_set_block_stats(priv, fec_stats);
+}
+
+#define PPORT_ETH_EXT_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_eth_ext_stats_desc[] = {
+ { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) },
+};
+
+#define NUM_PPORT_ETH_EXT_COUNTERS ARRAY_SIZE(pport_eth_ext_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(eth_ext)
+{
+ if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+ return NUM_PPORT_ETH_EXT_COUNTERS;
+
+ return 0;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(eth_ext)
+{
+ int i;
+
+ if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+ for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_eth_ext_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(eth_ext)
+{
+ int i;
+
+ if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+ for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters,
+ pport_eth_ext_stats_desc, i);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(eth_ext)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters))
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->eth_ext_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PCIE_PERF_OFF(c) \
+ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c)
+static const struct counter_desc pcie_perf_stats_desc[] = {
+ { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) },
+ { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) },
+};
+
+#define PCIE_PERF_OFF64(c) \
+ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pcie_perf_stats_desc64[] = {
+ { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) },
+};
+
+static const struct counter_desc pcie_perf_stall_stats_desc[] = {
+ { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) },
+ { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) },
+ { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) },
+ { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) },
+};
+
+#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc)
+#define NUM_PCIE_PERF_COUNTERS64 ARRAY_SIZE(pcie_perf_stats_desc64)
+#define NUM_PCIE_PERF_STALL_COUNTERS ARRAY_SIZE(pcie_perf_stall_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie)
+{
+ int num_stats = 0;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+ num_stats += NUM_PCIE_PERF_COUNTERS;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+ num_stats += NUM_PCIE_PERF_COUNTERS64;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+ num_stats += NUM_PCIE_PERF_STALL_COUNTERS;
+
+ return num_stats;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie)
+{
+ int i;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pcie_perf_stats_desc[i].format);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pcie_perf_stats_desc64[i].format);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+ for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pcie_perf_stall_stats_desc[i].format);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie)
+{
+ int i;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+ pcie_perf_stats_desc, i);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters,
+ pcie_perf_stats_desc64, i);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+ for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+ pcie_perf_stall_stats_desc, i);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie)
+{
+ struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(mpcnt_reg);
+ void *out;
+
+ if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group))
+ return;
+
+ out = pcie_stats->pcie_perf_counters;
+ MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
+}
+
+#define PPORT_PER_TC_PRIO_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_per_tc_prio_grp_data_layout.c##_high)
+
+static const struct counter_desc pport_per_tc_prio_stats_desc[] = {
+ { "rx_prio%d_buf_discard", PPORT_PER_TC_PRIO_OFF(no_buffer_discard_uc) },
+};
+
+#define NUM_PPORT_PER_TC_PRIO_COUNTERS ARRAY_SIZE(pport_per_tc_prio_stats_desc)
+
+#define PPORT_PER_TC_CONGEST_PRIO_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_per_tc_congest_prio_grp_data_layout.c##_high)
+
+static const struct counter_desc pport_per_tc_congest_prio_stats_desc[] = {
+ { "rx_prio%d_cong_discard", PPORT_PER_TC_CONGEST_PRIO_OFF(wred_discard) },
+ { "rx_prio%d_marked", PPORT_PER_TC_CONGEST_PRIO_OFF(ecn_marked_tc) },
+};
+
+#define NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS \
+ ARRAY_SIZE(pport_per_tc_congest_prio_stats_desc)
+
+static int mlx5e_grp_per_tc_prio_get_num_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+ return 0;
+
+ return NUM_PPORT_PER_TC_PRIO_COUNTERS * NUM_PPORT_PRIO;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_port_buff_congest)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int i, prio;
+
+ if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+ return idx;
+
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_tc_prio_stats_desc[i].format, prio);
+ for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_tc_congest_prio_stats_desc[i].format, prio);
+ }
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_port_buff_congest)
+{
+ struct mlx5e_pport_stats *pport = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int i, prio;
+
+ if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+ return idx;
+
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&pport->per_tc_prio_counters[prio],
+ pport_per_tc_prio_stats_desc, i);
+ for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS ; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&pport->per_tc_congest_prio_counters[prio],
+ pport_per_tc_congest_prio_stats_desc, i);
+ }
+
+ return idx;
+}
+
+static void mlx5e_grp_per_tc_prio_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+ int prio;
+
+ if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+ return;
+
+ MLX5_SET(ppcnt_reg, in, pnat, 2);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP);
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ out = pstats->per_tc_prio_counters[prio];
+ MLX5_SET(ppcnt_reg, in, prio_tc, prio);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ }
+}
+
+static int mlx5e_grp_per_tc_congest_prio_get_num_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+ return 0;
+
+ return NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS * NUM_PPORT_PRIO;
+}
+
+static void mlx5e_grp_per_tc_congest_prio_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+ int prio;
+
+ if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+ return;
+
+ MLX5_SET(ppcnt_reg, in, pnat, 2);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP);
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ out = pstats->per_tc_congest_prio_counters[prio];
+ MLX5_SET(ppcnt_reg, in, prio_tc, prio);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ }
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_port_buff_congest)
+{
+ return mlx5e_grp_per_tc_prio_get_num_stats(priv) +
+ mlx5e_grp_per_tc_congest_prio_get_num_stats(priv);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_port_buff_congest)
+{
+ mlx5e_grp_per_tc_prio_update_stats(priv);
+ mlx5e_grp_per_tc_congest_prio_update_stats(priv);
+}
+
+#define PPORT_PER_PRIO_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_per_prio_grp_data_layout.c##_high)
+static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
+ { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
+ { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
+ { "rx_prio%d_discards", PPORT_PER_PRIO_OFF(rx_discards) },
+ { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
+ { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
+};
+
+#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS ARRAY_SIZE(pport_per_prio_traffic_stats_desc)
+
+static int mlx5e_grp_per_prio_traffic_get_num_stats(void)
+{
+ return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO;
+}
+
+static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv,
+ u8 *data,
+ int idx)
+{
+ int i, prio;
+
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_prio_traffic_stats_desc[i].format, prio);
+ }
+
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv,
+ u64 *data,
+ int idx)
+{
+ int i, prio;
+
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
+ pport_per_prio_traffic_stats_desc, i);
+ }
+
+ return idx;
+}
+
+static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
+ /* %s is "global" or "prio{i}" */
+ { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) },
+ { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
+ { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) },
+ { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
+ { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
+};
+
+static const struct counter_desc pport_pfc_stall_stats_desc[] = {
+ { "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
+ { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) },
+};
+
+#define NUM_PPORT_PER_PRIO_PFC_COUNTERS ARRAY_SIZE(pport_per_prio_pfc_stats_desc)
+#define NUM_PPORT_PFC_STALL_COUNTERS(priv) (ARRAY_SIZE(pport_pfc_stall_stats_desc) * \
+ MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) * \
+ MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+
+static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 pfc_en_tx;
+ u8 pfc_en_rx;
+ int err;
+
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return 0;
+
+ err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx);
+
+ return err ? 0 : pfc_en_tx | pfc_en_rx;
+}
+
+static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 rx_pause;
+ u32 tx_pause;
+ int err;
+
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return false;
+
+ err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
+ return err ? false : rx_pause | tx_pause;
+}
+
+static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv)
+{
+ return (mlx5e_query_global_pause_combined(priv) +
+ hweight8(mlx5e_query_pfc_combined(priv))) *
+ NUM_PPORT_PER_PRIO_PFC_COUNTERS +
+ NUM_PPORT_PFC_STALL_COUNTERS(priv);
+}
+
+static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv,
+ u8 *data,
+ int idx)
+{
+ unsigned long pfc_combined;
+ int i, prio;
+
+ pfc_combined = mlx5e_query_pfc_combined(priv);
+ for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ char pfc_string[ETH_GSTRING_LEN];
+
+ snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio);
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_prio_pfc_stats_desc[i].format, pfc_string);
+ }
+ }
+
+ if (mlx5e_query_global_pause_combined(priv)) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_prio_pfc_stats_desc[i].format, "global");
+ }
+ }
+
+ for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_pfc_stall_stats_desc[i].format);
+
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv,
+ u64 *data,
+ int idx)
+{
+ unsigned long pfc_combined;
+ int i, prio;
+
+ pfc_combined = mlx5e_query_pfc_combined(priv);
+ for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
+ pport_per_prio_pfc_stats_desc, i);
+ }
+ }
+
+ if (mlx5e_query_global_pause_combined(priv)) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+ pport_per_prio_pfc_stats_desc, i);
+ }
+ }
+
+ for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+ pport_pfc_stall_stats_desc, i);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_prio)
+{
+ return mlx5e_grp_per_prio_traffic_get_num_stats() +
+ mlx5e_grp_per_prio_pfc_get_num_stats(priv);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_prio)
+{
+ idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx);
+ idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_prio)
+{
+ idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx);
+ idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx);
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_prio)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int prio;
+ void *out;
+
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ out = pstats->per_prio_counters[prio];
+ MLX5_SET(ppcnt_reg, in, prio_tc, prio);
+ mlx5_core_access_reg(mdev, in, sz, out, sz,
+ MLX5_REG_PPCNT, 0, 0);
+ }
+}
+
+static const struct counter_desc mlx5e_pme_status_desc[] = {
+ { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED },
+};
+
+static const struct counter_desc mlx5e_pme_error_desc[] = {
+ { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK },
+ { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE },
+ { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE },
+};
+
+#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc)
+#define NUM_PME_ERR_STATS ARRAY_SIZE(mlx5e_pme_error_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pme)
+{
+ return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pme)
+{
+ int i;
+
+ for (i = 0; i < NUM_PME_STATUS_STATS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format);
+
+ for (i = 0; i < NUM_PME_ERR_STATS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pme)
+{
+ struct mlx5_pme_stats pme_stats;
+ int i;
+
+ mlx5_get_pme_stats(priv->mdev, &pme_stats);
+
+ for (i = 0; i < NUM_PME_STATUS_STATS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters,
+ mlx5e_pme_status_desc, i);
+
+ for (i = 0; i < NUM_PME_ERR_STATS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters,
+ mlx5e_pme_error_desc, i);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; }
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls)
+{
+ return mlx5e_ktls_get_count(priv);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls)
+{
+ return idx + mlx5e_ktls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls)
+{
+ return idx + mlx5e_ktls_get_stats(priv, data + idx);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; }
+
+static const struct counter_desc rq_stats_desc[] = {
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_skbs) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_match_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_large_hds) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
+#ifdef CONFIG_PAGE_POOL_STATS
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow_high_order) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_empty) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_refill) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_waive) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cached) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cache_full) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring_full) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_released_ref) },
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_pkt) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_start) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_skip) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_ok) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_retry) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_skip) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_err) },
+#endif
+};
+
+static const struct counter_desc sq_stats_desc[] = {
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
+#endif
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+};
+
+static const struct counter_desc rq_xdpsq_stats_desc[] = {
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
+static const struct counter_desc xdpsq_stats_desc[] = {
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
+static const struct counter_desc xskrq_stats_desc[] = {
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) },
+};
+
+static const struct counter_desc xsksq_stats_desc[] = {
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
+static const struct counter_desc ch_stats_desc[] = {
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
+};
+
+static const struct counter_desc ptp_sq_stats_desc[] = {
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, packets) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, bytes) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, stopped) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, dropped) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, recover) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, cqes) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, wake) },
+ { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+};
+
+static const struct counter_desc ptp_ch_stats_desc[] = {
+ { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, events) },
+ { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, poll) },
+ { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, arm) },
+ { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
+};
+
+static const struct counter_desc ptp_cq_stats_desc[] = {
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, cqe) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, err_cqe) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_cqe) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_event) },
+ { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, ooo_cqe_drop) },
+};
+
+static const struct counter_desc ptp_rq_stats_desc[] = {
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) },
+ { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) },
+};
+
+static const struct counter_desc qos_sq_stats_desc[] = {
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
+#endif
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) },
+ { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+};
+
+#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc)
+#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc)
+#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc)
+#define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc)
+#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc)
+#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc)
+#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc)
+#define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc)
+#define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc)
+#define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc)
+#define NUM_PTP_RQ_STATS ARRAY_SIZE(ptp_rq_stats_desc)
+#define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos)
+{
+ /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+ return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb_max_qos_sqs);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos)
+{
+ /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+ u16 max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs);
+ int i, qid;
+
+ for (qid = 0; qid < max_qos_sqs; qid++)
+ for (i = 0; i < NUM_QOS_SQ_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ qos_sq_stats_desc[i].format, qid);
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos)
+{
+ struct mlx5e_sq_stats **stats;
+ u16 max_qos_sqs;
+ int i, qid;
+
+ /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+ max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs);
+ stats = READ_ONCE(priv->htb_qos_sq_stats);
+
+ for (qid = 0; qid < max_qos_sqs; qid++) {
+ struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]);
+
+ for (i = 0; i < NUM_QOS_SQ_STATS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i);
+ }
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; }
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp)
+{
+ int num = NUM_PTP_CH_STATS;
+
+ if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
+ return 0;
+
+ if (priv->tx_ptp_opened)
+ num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc;
+ if (priv->rx_ptp_opened)
+ num += NUM_PTP_RQ_STATS;
+
+ return num;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp)
+{
+ int i, tc;
+
+ if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
+ return idx;
+
+ for (i = 0; i < NUM_PTP_CH_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ "%s", ptp_ch_stats_desc[i].format);
+
+ if (priv->tx_ptp_opened) {
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < NUM_PTP_SQ_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ ptp_sq_stats_desc[i].format, tc);
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < NUM_PTP_CQ_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ ptp_cq_stats_desc[i].format, tc);
+ }
+ if (priv->rx_ptp_opened) {
+ for (i = 0; i < NUM_PTP_RQ_STATS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ ptp_rq_stats_desc[i].format, MLX5E_PTP_CHANNEL_IX);
+ }
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp)
+{
+ int i, tc;
+
+ if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
+ return idx;
+
+ for (i = 0; i < NUM_PTP_CH_STATS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch,
+ ptp_ch_stats_desc, i);
+
+ if (priv->tx_ptp_opened) {
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < NUM_PTP_SQ_STATS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc],
+ ptp_sq_stats_desc, i);
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < NUM_PTP_CQ_STATS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc],
+ ptp_cq_stats_desc, i);
+ }
+ if (priv->rx_ptp_opened) {
+ for (i = 0; i < NUM_PTP_RQ_STATS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq,
+ ptp_rq_stats_desc, i);
+ }
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ptp) { return; }
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(channels)
+{
+ int max_nch = priv->stats_nch;
+
+ return (NUM_RQ_STATS * max_nch) +
+ (NUM_CH_STATS * max_nch) +
+ (NUM_SQ_STATS * max_nch * priv->max_opened_tc) +
+ (NUM_RQ_XDPSQ_STATS * max_nch) +
+ (NUM_XDPSQ_STATS * max_nch) +
+ (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) +
+ (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels)
+{
+ bool is_xsk = priv->xsk.ever_used;
+ int max_nch = priv->stats_nch;
+ int i, j, tc;
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_CH_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ ch_stats_desc[j].format, i);
+
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_RQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ rq_stats_desc[j].format, i);
+ for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ xskrq_stats_desc[j].format, i);
+ for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ rq_xdpsq_stats_desc[j].format, i);
+ }
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_SQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ sq_stats_desc[j].format,
+ i + tc * max_nch);
+
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ xsksq_stats_desc[j].format, i);
+ for (j = 0; j < NUM_XDPSQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ xdpsq_stats_desc[j].format, i);
+ }
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels)
+{
+ bool is_xsk = priv->xsk.ever_used;
+ int max_nch = priv->stats_nch;
+ int i, j, tc;
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_CH_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->ch,
+ ch_stats_desc, j);
+
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_RQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq,
+ rq_stats_desc, j);
+ for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xskrq,
+ xskrq_stats_desc, j);
+ for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq_xdpsq,
+ rq_xdpsq_stats_desc, j);
+ }
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_SQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->sq[tc],
+ sq_stats_desc, j);
+
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xsksq,
+ xsksq_stats_desc, j);
+ for (j = 0; j < NUM_XDPSQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xdpsq,
+ xdpsq_stats_desc, j);
+ }
+
+ return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(channels) { return; }
+
+MLX5E_DEFINE_STATS_GRP(sw, 0);
+MLX5E_DEFINE_STATS_GRP(qcnt, MLX5E_NDO_UPDATE_STATS);
+MLX5E_DEFINE_STATS_GRP(vnic_env, 0);
+MLX5E_DEFINE_STATS_GRP(vport, MLX5E_NDO_UPDATE_STATS);
+MLX5E_DEFINE_STATS_GRP(802_3, MLX5E_NDO_UPDATE_STATS);
+MLX5E_DEFINE_STATS_GRP(2863, 0);
+MLX5E_DEFINE_STATS_GRP(2819, 0);
+MLX5E_DEFINE_STATS_GRP(phy, 0);
+MLX5E_DEFINE_STATS_GRP(pcie, 0);
+MLX5E_DEFINE_STATS_GRP(per_prio, 0);
+MLX5E_DEFINE_STATS_GRP(pme, 0);
+MLX5E_DEFINE_STATS_GRP(channels, 0);
+MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0);
+MLX5E_DEFINE_STATS_GRP(eth_ext, 0);
+static MLX5E_DEFINE_STATS_GRP(tls, 0);
+MLX5E_DEFINE_STATS_GRP(ptp, 0);
+static MLX5E_DEFINE_STATS_GRP(qos, 0);
+
+/* The stats groups order is opposite to the update_stats() order calls */
+mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
+ &MLX5E_STATS_GRP(sw),
+ &MLX5E_STATS_GRP(qcnt),
+ &MLX5E_STATS_GRP(vnic_env),
+ &MLX5E_STATS_GRP(vport),
+ &MLX5E_STATS_GRP(802_3),
+ &MLX5E_STATS_GRP(2863),
+ &MLX5E_STATS_GRP(2819),
+ &MLX5E_STATS_GRP(phy),
+ &MLX5E_STATS_GRP(eth_ext),
+ &MLX5E_STATS_GRP(pcie),
+ &MLX5E_STATS_GRP(per_prio),
+ &MLX5E_STATS_GRP(pme),
+#ifdef CONFIG_MLX5_EN_IPSEC
+ &MLX5E_STATS_GRP(ipsec_sw),
+#endif
+ &MLX5E_STATS_GRP(tls),
+ &MLX5E_STATS_GRP(channels),
+ &MLX5E_STATS_GRP(per_port_buff_congest),
+ &MLX5E_STATS_GRP(ptp),
+ &MLX5E_STATS_GRP(qos),
+#ifdef CONFIG_MLX5_EN_MACSEC
+ &MLX5E_STATS_GRP(macsec_hw),
+#endif
+};
+
+unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv)
+{
+ return ARRAY_SIZE(mlx5e_nic_stats_grps);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
new file mode 100644
index 000000000..52a67efaf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_EN_STATS_H__
+#define __MLX5_EN_STATS_H__
+
+#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \
+ (*(u64 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \
+ be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \
+ (*(u32 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \
+ be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
+
+#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld)
+
+#define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld)
+
+#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
+
+struct counter_desc {
+ char format[ETH_GSTRING_LEN];
+ size_t offset; /* Byte offset */
+};
+
+enum {
+ MLX5E_NDO_UPDATE_STATS = BIT(0x1),
+};
+
+struct mlx5e_priv;
+struct mlx5e_stats_grp {
+ u16 update_stats_mask;
+ int (*get_num_stats)(struct mlx5e_priv *priv);
+ int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx);
+ int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx);
+ void (*update_stats)(struct mlx5e_priv *priv);
+};
+
+typedef const struct mlx5e_stats_grp *const mlx5e_stats_grp_t;
+
+#define MLX5E_STATS_GRP_OP(grp, name) mlx5e_stats_grp_ ## grp ## _ ## name
+
+#define MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(grp) \
+ int MLX5E_STATS_GRP_OP(grp, num_stats)(struct mlx5e_priv *priv)
+
+#define MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(grp) \
+ void MLX5E_STATS_GRP_OP(grp, update_stats)(struct mlx5e_priv *priv)
+
+#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(grp) \
+ int MLX5E_STATS_GRP_OP(grp, fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx)
+
+#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(grp) \
+ int MLX5E_STATS_GRP_OP(grp, fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx)
+
+#define MLX5E_STATS_GRP(grp) mlx5e_stats_grp_ ## grp
+
+#define MLX5E_DECLARE_STATS_GRP(grp) \
+ const struct mlx5e_stats_grp MLX5E_STATS_GRP(grp)
+
+#define MLX5E_DEFINE_STATS_GRP(grp, mask) \
+MLX5E_DECLARE_STATS_GRP(grp) = { \
+ .get_num_stats = MLX5E_STATS_GRP_OP(grp, num_stats), \
+ .fill_stats = MLX5E_STATS_GRP_OP(grp, fill_stats), \
+ .fill_strings = MLX5E_STATS_GRP_OP(grp, fill_strings), \
+ .update_stats = MLX5E_STATS_GRP_OP(grp, update_stats), \
+ .update_stats_mask = mask, \
+}
+
+unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv);
+void mlx5e_stats_update(struct mlx5e_priv *priv);
+void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx);
+void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data);
+void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv);
+
+void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
+ struct ethtool_pause_stats *pause_stats);
+void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats);
+
+void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_phy_stats *phy_stats);
+void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_mac_stats *mac_stats);
+void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv,
+ struct ethtool_eth_ctrl_stats *ctrl_stats);
+void mlx5e_stats_rmon_get(struct mlx5e_priv *priv,
+ struct ethtool_rmon_stats *rmon,
+ const struct ethtool_rmon_hist_range **ranges);
+
+/* Concrete NIC Stats */
+
+struct mlx5e_sw_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_tso_packets;
+ u64 tx_tso_bytes;
+ u64 tx_tso_inner_packets;
+ u64 tx_tso_inner_bytes;
+ u64 tx_added_vlan_packets;
+ u64 tx_nop;
+ u64 tx_mpwqe_blks;
+ u64 tx_mpwqe_pkts;
+ u64 rx_lro_packets;
+ u64 rx_lro_bytes;
+ u64 rx_gro_packets;
+ u64 rx_gro_bytes;
+ u64 rx_gro_skbs;
+ u64 rx_gro_match_packets;
+ u64 rx_gro_large_hds;
+ u64 rx_mcast_packets;
+ u64 rx_ecn_mark;
+ u64 rx_removed_vlan_packets;
+ u64 rx_csum_unnecessary;
+ u64 rx_csum_none;
+ u64 rx_csum_complete;
+ u64 rx_csum_complete_tail;
+ u64 rx_csum_complete_tail_slow;
+ u64 rx_csum_unnecessary_inner;
+ u64 rx_xdp_drop;
+ u64 rx_xdp_redirect;
+ u64 rx_xdp_tx_xmit;
+ u64 rx_xdp_tx_mpwqe;
+ u64 rx_xdp_tx_inlnw;
+ u64 rx_xdp_tx_nops;
+ u64 rx_xdp_tx_full;
+ u64 rx_xdp_tx_err;
+ u64 rx_xdp_tx_cqe;
+ u64 tx_csum_none;
+ u64 tx_csum_partial;
+ u64 tx_csum_partial_inner;
+ u64 tx_queue_stopped;
+ u64 tx_queue_dropped;
+ u64 tx_xmit_more;
+ u64 tx_recover;
+ u64 tx_cqes;
+ u64 tx_queue_wake;
+ u64 tx_cqe_err;
+ u64 tx_xdp_xmit;
+ u64 tx_xdp_mpwqe;
+ u64 tx_xdp_inlnw;
+ u64 tx_xdp_nops;
+ u64 tx_xdp_full;
+ u64 tx_xdp_err;
+ u64 tx_xdp_cqes;
+ u64 rx_wqe_err;
+ u64 rx_mpwqe_filler_cqes;
+ u64 rx_mpwqe_filler_strides;
+ u64 rx_oversize_pkts_sw_drop;
+ u64 rx_buff_alloc_err;
+ u64 rx_cqe_compress_blks;
+ u64 rx_cqe_compress_pkts;
+ u64 rx_cache_reuse;
+ u64 rx_cache_full;
+ u64 rx_cache_empty;
+ u64 rx_cache_busy;
+ u64 rx_cache_waive;
+ u64 rx_congst_umr;
+ u64 rx_arfs_err;
+ u64 rx_recover;
+ u64 ch_events;
+ u64 ch_poll;
+ u64 ch_arm;
+ u64 ch_aff_change;
+ u64 ch_force_irq;
+ u64 ch_eq_rearm;
+#ifdef CONFIG_PAGE_POOL_STATS
+ u64 rx_pp_alloc_fast;
+ u64 rx_pp_alloc_slow;
+ u64 rx_pp_alloc_slow_high_order;
+ u64 rx_pp_alloc_empty;
+ u64 rx_pp_alloc_refill;
+ u64 rx_pp_alloc_waive;
+ u64 rx_pp_recycle_cached;
+ u64 rx_pp_recycle_cache_full;
+ u64 rx_pp_recycle_ring;
+ u64 rx_pp_recycle_ring_full;
+ u64 rx_pp_recycle_released_ref;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tx_tls_encrypted_packets;
+ u64 tx_tls_encrypted_bytes;
+ u64 tx_tls_ooo;
+ u64 tx_tls_dump_packets;
+ u64 tx_tls_dump_bytes;
+ u64 tx_tls_resync_bytes;
+ u64 tx_tls_skip_no_sync_data;
+ u64 tx_tls_drop_no_sync_data;
+ u64 tx_tls_drop_bypass_req;
+
+ u64 rx_tls_decrypted_packets;
+ u64 rx_tls_decrypted_bytes;
+ u64 rx_tls_resync_req_pkt;
+ u64 rx_tls_resync_req_start;
+ u64 rx_tls_resync_req_end;
+ u64 rx_tls_resync_req_skip;
+ u64 rx_tls_resync_res_ok;
+ u64 rx_tls_resync_res_retry;
+ u64 rx_tls_resync_res_skip;
+ u64 rx_tls_err;
+#endif
+
+ u64 rx_xsk_packets;
+ u64 rx_xsk_bytes;
+ u64 rx_xsk_csum_complete;
+ u64 rx_xsk_csum_unnecessary;
+ u64 rx_xsk_csum_unnecessary_inner;
+ u64 rx_xsk_csum_none;
+ u64 rx_xsk_ecn_mark;
+ u64 rx_xsk_removed_vlan_packets;
+ u64 rx_xsk_xdp_drop;
+ u64 rx_xsk_xdp_redirect;
+ u64 rx_xsk_wqe_err;
+ u64 rx_xsk_mpwqe_filler_cqes;
+ u64 rx_xsk_mpwqe_filler_strides;
+ u64 rx_xsk_oversize_pkts_sw_drop;
+ u64 rx_xsk_buff_alloc_err;
+ u64 rx_xsk_cqe_compress_blks;
+ u64 rx_xsk_cqe_compress_pkts;
+ u64 rx_xsk_congst_umr;
+ u64 rx_xsk_arfs_err;
+ u64 tx_xsk_xmit;
+ u64 tx_xsk_mpwqe;
+ u64 tx_xsk_inlnw;
+ u64 tx_xsk_full;
+ u64 tx_xsk_err;
+ u64 tx_xsk_cqes;
+};
+
+struct mlx5e_qcounter_stats {
+ u32 rx_out_of_buffer;
+ u32 rx_if_down_packets;
+};
+
+#define VNIC_ENV_GET(vnic_env_stats, c) \
+ MLX5_GET(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \
+ vport_env.c)
+
+struct mlx5e_vnic_env_stats {
+ __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
+};
+
+#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \
+ vstats->query_vport_out, c)
+
+struct mlx5e_vport_stats {
+ __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)];
+};
+
+#define PPORT_802_3_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \
+ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
+#define PPORT_2863_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \
+ counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
+#define PPORT_2819_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \
+ counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
+#define PPORT_PHY_STATISTICAL_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \
+ counter_set.phys_layer_statistical_cntrs.c##_high)
+#define PPORT_PER_PRIO_GET(pstats, prio, c) \
+ MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \
+ counter_set.eth_per_prio_grp_data_layout.c##_high)
+#define NUM_PPORT_PRIO 8
+#define PPORT_ETH_EXT_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \
+ counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
+
+struct mlx5e_pport_stats {
+ __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 per_tc_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 per_tc_congest_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)];
+};
+
+#define PCIE_PERF_GET(pcie_stats, c) \
+ MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
+ counter_set.pcie_perf_cntrs_grp_data_layout.c)
+
+#define PCIE_PERF_GET64(pcie_stats, c) \
+ MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
+ counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
+
+struct mlx5e_pcie_stats {
+ __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
+};
+
+struct mlx5e_rq_stats {
+ u64 packets;
+ u64 bytes;
+ u64 csum_complete;
+ u64 csum_complete_tail;
+ u64 csum_complete_tail_slow;
+ u64 csum_unnecessary;
+ u64 csum_unnecessary_inner;
+ u64 csum_none;
+ u64 lro_packets;
+ u64 lro_bytes;
+ u64 gro_packets;
+ u64 gro_bytes;
+ u64 gro_skbs;
+ u64 gro_match_packets;
+ u64 gro_large_hds;
+ u64 mcast_packets;
+ u64 ecn_mark;
+ u64 removed_vlan_packets;
+ u64 xdp_drop;
+ u64 xdp_redirect;
+ u64 wqe_err;
+ u64 mpwqe_filler_cqes;
+ u64 mpwqe_filler_strides;
+ u64 oversize_pkts_sw_drop;
+ u64 buff_alloc_err;
+ u64 cqe_compress_blks;
+ u64 cqe_compress_pkts;
+ u64 cache_reuse;
+ u64 cache_full;
+ u64 cache_empty;
+ u64 cache_busy;
+ u64 cache_waive;
+ u64 congst_umr;
+ u64 arfs_err;
+ u64 recover;
+#ifdef CONFIG_PAGE_POOL_STATS
+ u64 pp_alloc_fast;
+ u64 pp_alloc_slow;
+ u64 pp_alloc_slow_high_order;
+ u64 pp_alloc_empty;
+ u64 pp_alloc_refill;
+ u64 pp_alloc_waive;
+ u64 pp_recycle_cached;
+ u64 pp_recycle_cache_full;
+ u64 pp_recycle_ring;
+ u64 pp_recycle_ring_full;
+ u64 pp_recycle_released_ref;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tls_decrypted_packets;
+ u64 tls_decrypted_bytes;
+ u64 tls_resync_req_pkt;
+ u64 tls_resync_req_start;
+ u64 tls_resync_req_end;
+ u64 tls_resync_req_skip;
+ u64 tls_resync_res_ok;
+ u64 tls_resync_res_retry;
+ u64 tls_resync_res_skip;
+ u64 tls_err;
+#endif
+};
+
+struct mlx5e_sq_stats {
+ /* commonly accessed in data path */
+ u64 packets;
+ u64 bytes;
+ u64 xmit_more;
+ u64 tso_packets;
+ u64 tso_bytes;
+ u64 tso_inner_packets;
+ u64 tso_inner_bytes;
+ u64 csum_partial;
+ u64 csum_partial_inner;
+ u64 added_vlan_packets;
+ u64 nop;
+ u64 mpwqe_blks;
+ u64 mpwqe_pkts;
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tls_encrypted_packets;
+ u64 tls_encrypted_bytes;
+ u64 tls_ooo;
+ u64 tls_dump_packets;
+ u64 tls_dump_bytes;
+ u64 tls_resync_bytes;
+ u64 tls_skip_no_sync_data;
+ u64 tls_drop_no_sync_data;
+ u64 tls_drop_bypass_req;
+#endif
+ /* less likely accessed in data path */
+ u64 csum_none;
+ u64 stopped;
+ u64 dropped;
+ u64 recover;
+ /* dirtied @completion */
+ u64 cqes ____cacheline_aligned_in_smp;
+ u64 wake;
+ u64 cqe_err;
+};
+
+struct mlx5e_xdpsq_stats {
+ u64 xmit;
+ u64 mpwqe;
+ u64 inlnw;
+ u64 nops;
+ u64 full;
+ u64 err;
+ /* dirtied @completion */
+ u64 cqes ____cacheline_aligned_in_smp;
+};
+
+struct mlx5e_ch_stats {
+ u64 events;
+ u64 poll;
+ u64 arm;
+ u64 aff_change;
+ u64 force_irq;
+ u64 eq_rearm;
+};
+
+struct mlx5e_ptp_cq_stats {
+ u64 cqe;
+ u64 err_cqe;
+ u64 abort;
+ u64 abort_abs_diff_ns;
+ u64 resync_cqe;
+ u64 resync_event;
+ u64 ooo_cqe_drop;
+};
+
+struct mlx5e_stats {
+ struct mlx5e_sw_stats sw;
+ struct mlx5e_qcounter_stats qcnt;
+ struct mlx5e_vnic_env_stats vnic;
+ struct mlx5e_vport_stats vport;
+ struct mlx5e_pport_stats pport;
+ struct rtnl_link_stats64 vf_vport;
+ struct mlx5e_pcie_stats pcie;
+};
+
+extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[];
+unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv);
+
+extern MLX5E_DECLARE_STATS_GRP(sw);
+extern MLX5E_DECLARE_STATS_GRP(qcnt);
+extern MLX5E_DECLARE_STATS_GRP(vnic_env);
+extern MLX5E_DECLARE_STATS_GRP(vport);
+extern MLX5E_DECLARE_STATS_GRP(802_3);
+extern MLX5E_DECLARE_STATS_GRP(2863);
+extern MLX5E_DECLARE_STATS_GRP(2819);
+extern MLX5E_DECLARE_STATS_GRP(phy);
+extern MLX5E_DECLARE_STATS_GRP(eth_ext);
+extern MLX5E_DECLARE_STATS_GRP(pcie);
+extern MLX5E_DECLARE_STATS_GRP(per_prio);
+extern MLX5E_DECLARE_STATS_GRP(pme);
+extern MLX5E_DECLARE_STATS_GRP(channels);
+extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest);
+extern MLX5E_DECLARE_STATS_GRP(ipsec_sw);
+extern MLX5E_DECLARE_STATS_GRP(ptp);
+extern MLX5E_DECLARE_STATS_GRP(macsec_hw);
+
+#endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
new file mode 100644
index 000000000..43239555f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -0,0 +1,5307 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/flow_dissector.h>
+#include <net/flow_offload.h>
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/device.h>
+#include <linux/rhashtable.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
+#include <net/arp.h>
+#include <net/ipv6_stubs.h>
+#include <net/bareudp.h>
+#include <net/bonding.h>
+#include "en.h"
+#include "en/tc/post_act.h"
+#include "en_rep.h"
+#include "en/rep/tc.h"
+#include "en/rep/neigh.h"
+#include "en_tc.h"
+#include "eswitch.h"
+#include "fs_core.h"
+#include "en/port.h"
+#include "en/tc_tun.h"
+#include "en/mapping.h"
+#include "en/tc_ct.h"
+#include "en/mod_hdr.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc/sample.h"
+#include "en/tc/act/act.h"
+#include "en/tc/post_meter.h"
+#include "lib/devcom.h"
+#include "lib/geneve.h"
+#include "lib/fs_chains.h"
+#include "diag/en_tc_tracepoint.h"
+#include <asm/div64.h>
+#include "lag/lag.h"
+#include "lag/mp.h"
+
+#define MLX5E_TC_TABLE_NUM_GROUPS 4
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
+
+struct mlx5e_tc_table {
+ /* Protects the dynamic assignment of the t parameter
+ * which is the nic tc root table.
+ */
+ struct mutex t_lock;
+ struct mlx5e_priv *priv;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_table *miss_t;
+ struct mlx5_fs_chains *chains;
+ struct mlx5e_post_act *post_act;
+
+ struct rhashtable ht;
+
+ struct mod_hdr_tbl mod_hdr;
+ struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
+ DECLARE_HASHTABLE(hairpin_tbl, 8);
+
+ struct notifier_block netdevice_nb;
+ struct netdev_net_notifier netdevice_nn;
+
+ struct mlx5_tc_ct_priv *ct;
+ struct mapping_ctx *mapping;
+};
+
+struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
+ [CHAIN_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
+ .moffset = 0,
+ .mlen = 16,
+ },
+ [VPORT_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
+ .moffset = 16,
+ .mlen = 16,
+ },
+ [TUNNEL_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
+ .moffset = 8,
+ .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
+ .soffset = MLX5_BYTE_OFF(fte_match_param,
+ misc_parameters_2.metadata_reg_c_1),
+ },
+ [ZONE_TO_REG] = zone_to_reg_ct,
+ [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
+ [CTSTATE_TO_REG] = ctstate_to_reg_ct,
+ [MARK_TO_REG] = mark_to_reg_ct,
+ [LABELS_TO_REG] = labels_to_reg_ct,
+ [FTEID_TO_REG] = fteid_to_reg_ct,
+ /* For NIC rules we store the restore metadata directly
+ * into reg_b that is passed to SW since we don't
+ * jump between steering domains.
+ */
+ [NIC_CHAIN_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
+ .moffset = 0,
+ .mlen = 16,
+ },
+ [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
+ [PACKET_COLOR_TO_REG] = packet_color_to_reg,
+};
+
+struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
+{
+ struct mlx5e_tc_table *tc;
+
+ tc = kvzalloc(sizeof(*tc), GFP_KERNEL);
+ return tc ? tc : ERR_PTR(-ENOMEM);
+}
+
+void mlx5e_tc_table_free(struct mlx5e_tc_table *tc)
+{
+ kvfree(tc);
+}
+
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
+{
+ return tc->chains;
+}
+
+/* To avoid false lock dependency warning set the tc_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key tc_ht_lock_key;
+static struct lock_class_key tc_ht_wq_key;
+
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
+static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
+
+void
+mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 val,
+ u32 mask)
+{
+ void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
+ int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ u32 max_mask = GENMASK(match_len - 1, 0);
+ __be32 curr_mask_be, curr_val_be;
+ u32 curr_mask, curr_val;
+
+ fmask = headers_c + soffset;
+ fval = headers_v + soffset;
+
+ memcpy(&curr_mask_be, fmask, 4);
+ memcpy(&curr_val_be, fval, 4);
+
+ curr_mask = be32_to_cpu(curr_mask_be);
+ curr_val = be32_to_cpu(curr_val_be);
+
+ //move to correct offset
+ WARN_ON(mask > max_mask);
+ mask <<= moffset;
+ val <<= moffset;
+ max_mask <<= moffset;
+
+ //zero val and mask
+ curr_mask &= ~max_mask;
+ curr_val &= ~max_mask;
+
+ //add current to mask
+ curr_mask |= mask;
+ curr_val |= val;
+
+ //back to be32 and write
+ curr_mask_be = cpu_to_be32(curr_mask);
+ curr_val_be = cpu_to_be32(curr_val);
+
+ memcpy(fmask, &curr_mask_be, 4);
+ memcpy(fval, &curr_val_be, 4);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+}
+
+void
+mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 *val,
+ u32 *mask)
+{
+ void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
+ int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ u32 max_mask = GENMASK(match_len - 1, 0);
+ __be32 curr_mask_be, curr_val_be;
+ u32 curr_mask, curr_val;
+
+ fmask = headers_c + soffset;
+ fval = headers_v + soffset;
+
+ memcpy(&curr_mask_be, fmask, 4);
+ memcpy(&curr_val_be, fval, 4);
+
+ curr_mask = be32_to_cpu(curr_mask_be);
+ curr_val = be32_to_cpu(curr_val_be);
+
+ *mask = (curr_mask >> moffset) & max_mask;
+ *val = (curr_val >> moffset) & max_mask;
+}
+
+int
+mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data)
+{
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
+ int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ char *modact;
+ int err;
+
+ modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
+ if (IS_ERR(modact))
+ return PTR_ERR(modact);
+
+ /* Firmware has 5bit length field and 0 means 32bits */
+ if (mlen == 32)
+ mlen = 0;
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, field, mfield);
+ MLX5_SET(set_action_in, modact, offset, moffset);
+ MLX5_SET(set_action_in, modact, length, mlen);
+ MLX5_SET(set_action_in, modact, data, data);
+ err = mod_hdr_acts->num_actions;
+ mod_hdr_acts->num_actions++;
+
+ return err;
+}
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ return uplink_priv->int_port_priv;
+ }
+
+ return NULL;
+}
+
+struct mlx5e_flow_meters *
+mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_priv *priv;
+
+ if (is_mdev_switchdev_mode(dev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ priv = netdev_priv(uplink_rpriv->netdev);
+ if (!uplink_priv->flow_meters)
+ uplink_priv->flow_meters =
+ mlx5e_flow_meters_init(priv,
+ MLX5_FLOW_NAMESPACE_FDB,
+ uplink_priv->post_act);
+ if (!IS_ERR(uplink_priv->flow_meters))
+ return uplink_priv->flow_meters;
+ }
+
+ return NULL;
+}
+
+static struct mlx5_tc_ct_priv *
+get_ct_priv(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ return uplink_priv->ct_priv;
+ }
+
+ return tc->ct;
+}
+
+static struct mlx5e_tc_psample *
+get_sample_priv(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ return uplink_priv->tc_psample;
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_post_act *
+get_post_action(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ return uplink_priv->post_act;
+ }
+
+ return tc->post_act;
+}
+
+struct mlx5_flow_handle *
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (is_mdev_switchdev_mode(priv->mdev))
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+
+ return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
+}
+
+void
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+ return;
+ }
+
+ mlx5e_del_offloaded_nic_rule(priv, rule, attr);
+}
+
+static bool
+is_flow_meter_action(struct mlx5_flow_attr *attr)
+{
+ return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
+ (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER));
+}
+
+static int
+mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_post_act *post_act = get_post_action(priv);
+ struct mlx5e_post_meter_priv *post_meter;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
+ if (IS_ERR(meter)) {
+ mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
+ return PTR_ERR(meter);
+ }
+
+ ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
+ post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, meter->green_counter,
+ meter->red_counter);
+ if (IS_ERR(post_meter)) {
+ mlx5_core_err(priv->mdev, "Failed to init post meter\n");
+ goto err_meter_init;
+ }
+
+ attr->meter_attr.meter = meter;
+ attr->meter_attr.post_meter = post_meter;
+ attr->dest_ft = mlx5e_post_meter_get_ft(post_meter);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+
+err_meter_init:
+ mlx5e_tc_meter_put(meter);
+ return PTR_ERR(post_meter);
+}
+
+static void
+mlx5e_tc_del_flow_meter(struct mlx5_flow_attr *attr)
+{
+ mlx5e_post_meter_cleanup(attr->meter_attr.post_meter);
+ mlx5e_tc_meter_put(attr->meter_attr.meter);
+}
+
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int err;
+
+ if (attr->flags & MLX5_ATTR_FLAG_CT) {
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
+ &attr->parse_attr->mod_hdr_acts;
+
+ return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
+ spec, attr,
+ mod_hdr_acts);
+ }
+
+ if (!is_mdev_switchdev_mode(priv->mdev))
+ return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
+ return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
+
+ if (is_flow_meter_action(attr)) {
+ err = mlx5e_tc_add_flow_meter(priv, attr);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+}
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (attr->flags & MLX5_ATTR_FLAG_CT) {
+ mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
+ return;
+ }
+
+ if (!is_mdev_switchdev_mode(priv->mdev)) {
+ mlx5e_del_offloaded_nic_rule(priv, rule, attr);
+ return;
+ }
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
+ mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
+ return;
+ }
+
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+
+ if (attr->meter_attr.meter)
+ mlx5e_tc_del_flow_meter(attr);
+}
+
+int
+mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data)
+{
+ int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
+
+ return ret < 0 ? ret : 0;
+}
+
+void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5e_tc_attr_to_reg type,
+ int act_id, u32 data)
+{
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
+ int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ char *modact;
+
+ modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
+
+ /* Firmware has 5bit length field and 0 means 32bits */
+ if (mlen == 32)
+ mlen = 0;
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, field, mfield);
+ MLX5_SET(set_action_in, modact, offset, moffset);
+ MLX5_SET(set_action_in, modact, length, mlen);
+ MLX5_SET(set_action_in, modact, data, data);
+}
+
+struct mlx5e_hairpin {
+ struct mlx5_hairpin *pair;
+
+ struct mlx5_core_dev *func_mdev;
+ struct mlx5e_priv *func_priv;
+ u32 tdn;
+ struct mlx5e_tir direct_tir;
+
+ int num_channels;
+ struct mlx5e_rqt indir_rqt;
+ struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5_ttc_table *ttc;
+};
+
+struct mlx5e_hairpin_entry {
+ /* a node of a hash table which keeps all the hairpin entries */
+ struct hlist_node hairpin_hlist;
+
+ /* protects flows list */
+ spinlock_t flows_lock;
+ /* flows sharing the same hairpin */
+ struct list_head flows;
+ /* hpe's that were not fully initialized when dead peer update event
+ * function traversed them.
+ */
+ struct list_head dead_peer_wait_list;
+
+ u16 peer_vhca_id;
+ u8 prio;
+ struct mlx5e_hairpin *hp;
+ refcount_t refcnt;
+ struct completion res_ready;
+};
+
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
+{
+ if (!flow || !refcount_inc_not_zero(&flow->refcnt))
+ return ERR_PTR(-EINVAL);
+ return flow;
+}
+
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
+{
+ if (refcount_dec_and_test(&flow->refcnt)) {
+ mlx5e_tc_del_flow(priv, flow);
+ kfree_rcu(flow, rcu_head);
+ }
+}
+
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
+{
+ return flow_flag_test(flow, ESWITCH);
+}
+
+bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
+{
+ return flow_flag_test(flow, FT);
+}
+
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
+{
+ return flow_flag_test(flow, OFFLOADED);
+}
+
+int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
+{
+ return mlx5e_is_eswitch_flow(flow) ?
+ MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
+}
+
+static struct mod_hdr_tbl *
+get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
+ &esw->offloads.mod_hdr :
+ &tc->mod_hdr;
+}
+
+static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5e_mod_hdr_handle *mh;
+
+ mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
+ mlx5e_get_flow_namespace(flow),
+ &parse_attr->mod_hdr_acts);
+ if (IS_ERR(mh))
+ return PTR_ERR(mh);
+
+ modify_hdr = mlx5e_mod_hdr_get(mh);
+ flow->attr->modify_hdr = modify_hdr;
+ flow->mh = mh;
+
+ return 0;
+}
+
+static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ /* flow wasn't fully initialized */
+ if (!flow->mh)
+ return;
+
+ mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
+ flow->mh);
+ flow->mh = NULL;
+}
+
+static
+struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
+{
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+
+ netdev = dev_get_by_index(net, ifindex);
+ if (!netdev)
+ return ERR_PTR(-ENODEV);
+
+ priv = netdev_priv(netdev);
+ mdev = priv->mdev;
+ dev_put(netdev);
+
+ /* Mirred tc action holds a refcount on the ifindex net_device (see
+ * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
+ * after dev_put(netdev), while we're in the context of adding a tc flow.
+ *
+ * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
+ * stored in a hairpin object, which exists until all flows, that refer to it, get
+ * removed.
+ *
+ * On the other hand, after a hairpin object has been created, the peer net_device may
+ * be removed/unbound while there are still some hairpin flows that are using it. This
+ * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
+ * NETDEV_UNREGISTER event of the peer net_device.
+ */
+ return mdev;
+}
+
+static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_tir_builder *builder;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
+ if (err)
+ goto out;
+
+ mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
+ err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
+ if (err)
+ goto create_tir_err;
+
+out:
+ mlx5e_tir_builder_free(builder);
+ return err;
+
+create_tir_err:
+ mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
+
+ goto out;
+}
+
+static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
+{
+ mlx5e_tir_destroy(&hp->direct_tir);
+ mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
+}
+
+static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_priv *priv = hp->func_priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_rss_params_indir *indir;
+ int err;
+
+ indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
+ if (!indir)
+ return -ENOMEM;
+
+ mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
+ err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
+ mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
+ indir);
+
+ kvfree(indir);
+ return err;
+}
+
+static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_priv *priv = hp->func_priv;
+ struct mlx5e_rss_params_hash rss_hash;
+ enum mlx5_traffic_types tt, max_tt;
+ struct mlx5e_tir_builder *builder;
+ int err = 0;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_rss_params_traffic_type rss_tt;
+
+ rss_tt = mlx5e_rss_get_default_tt_config(tt);
+
+ mlx5e_tir_builder_build_rqt(builder, hp->tdn,
+ mlx5e_rqt_get_rqtn(&hp->indir_rqt),
+ false);
+ mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
+
+ err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
+ if (err) {
+ mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
+ goto err_destroy_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+out:
+ mlx5e_tir_builder_free(builder);
+ return err;
+
+err_destroy_tirs:
+ max_tt = tt;
+ for (tt = 0; tt < max_tt; tt++)
+ mlx5e_tir_destroy(&hp->indir_tir[tt]);
+
+ goto out;
+}
+
+static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
+{
+ int tt;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ mlx5e_tir_destroy(&hp->indir_tir[tt]);
+}
+
+static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
+ struct ttc_params *ttc_params)
+{
+ struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
+
+ memset(ttc_params, 0, sizeof(*ttc_params));
+
+ ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_tir_get_tirn(&hp->direct_tir) :
+ mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+ }
+
+ ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
+ ft_attr->prio = MLX5E_TC_PRIO;
+}
+
+static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_priv *priv = hp->func_priv;
+ struct ttc_params ttc_params;
+ struct mlx5_ttc_table *ttc;
+ int err;
+
+ err = mlx5e_hairpin_create_indirect_rqt(hp);
+ if (err)
+ return err;
+
+ err = mlx5e_hairpin_create_indirect_tirs(hp);
+ if (err)
+ goto err_create_indirect_tirs;
+
+ mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
+ hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+ if (IS_ERR(hp->ttc)) {
+ err = PTR_ERR(hp->ttc);
+ goto err_create_ttc_table;
+ }
+
+ ttc = mlx5e_fs_get_ttc(priv->fs, false);
+ netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
+ hp->num_channels,
+ mlx5_get_ttc_flow_table(ttc)->id);
+
+ return 0;
+
+err_create_ttc_table:
+ mlx5e_hairpin_destroy_indirect_tirs(hp);
+err_create_indirect_tirs:
+ mlx5e_rqt_destroy(&hp->indir_rqt);
+
+ return err;
+}
+
+static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
+{
+ mlx5_destroy_ttc_table(hp->ttc);
+ mlx5e_hairpin_destroy_indirect_tirs(hp);
+ mlx5e_rqt_destroy(&hp->indir_rqt);
+}
+
+static struct mlx5e_hairpin *
+mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
+ int peer_ifindex)
+{
+ struct mlx5_core_dev *func_mdev, *peer_mdev;
+ struct mlx5e_hairpin *hp;
+ struct mlx5_hairpin *pair;
+ int err;
+
+ hp = kzalloc(sizeof(*hp), GFP_KERNEL);
+ if (!hp)
+ return ERR_PTR(-ENOMEM);
+
+ func_mdev = priv->mdev;
+ peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+ if (IS_ERR(peer_mdev)) {
+ err = PTR_ERR(peer_mdev);
+ goto create_pair_err;
+ }
+
+ pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
+ if (IS_ERR(pair)) {
+ err = PTR_ERR(pair);
+ goto create_pair_err;
+ }
+ hp->pair = pair;
+ hp->func_mdev = func_mdev;
+ hp->func_priv = priv;
+ hp->num_channels = params->num_channels;
+
+ err = mlx5e_hairpin_create_transport(hp);
+ if (err)
+ goto create_transport_err;
+
+ if (hp->num_channels > 1) {
+ err = mlx5e_hairpin_rss_init(hp);
+ if (err)
+ goto rss_init_err;
+ }
+
+ return hp;
+
+rss_init_err:
+ mlx5e_hairpin_destroy_transport(hp);
+create_transport_err:
+ mlx5_core_hairpin_destroy(hp->pair);
+create_pair_err:
+ kfree(hp);
+ return ERR_PTR(err);
+}
+
+static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
+{
+ if (hp->num_channels > 1)
+ mlx5e_hairpin_rss_cleanup(hp);
+ mlx5e_hairpin_destroy_transport(hp);
+ mlx5_core_hairpin_destroy(hp->pair);
+ kvfree(hp);
+}
+
+static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
+{
+ return (peer_vhca_id << 16 | prio);
+}
+
+static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
+ u16 peer_vhca_id, u8 prio)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5e_hairpin_entry *hpe;
+ u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
+
+ hash_for_each_possible(tc->hairpin_tbl, hpe,
+ hairpin_hlist, hash_key) {
+ if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
+ refcount_inc(&hpe->refcnt);
+ return hpe;
+ }
+ }
+
+ return NULL;
+}
+
+static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
+ struct mlx5e_hairpin_entry *hpe)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ /* no more hairpin flows for us, release the hairpin pair */
+ if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock))
+ return;
+ hash_del(&hpe->hairpin_hlist);
+ mutex_unlock(&tc->hairpin_tbl_lock);
+
+ if (!IS_ERR_OR_NULL(hpe->hp)) {
+ netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
+ dev_name(hpe->hp->pair->peer_mdev->device));
+
+ mlx5e_hairpin_destroy(hpe->hp);
+ }
+
+ WARN_ON(!list_empty(&hpe->flows));
+ kfree(hpe);
+}
+
+#define UNKNOWN_MATCH_PRIO 8
+
+static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec, u8 *match_prio,
+ struct netlink_ext_ack *extack)
+{
+ void *headers_c, *headers_v;
+ u8 prio_val, prio_mask = 0;
+ bool vlan_present;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "only PCP trust state supported for hairpin");
+ return -EOPNOTSUPP;
+ }
+#endif
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+
+ vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
+ if (vlan_present) {
+ prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ }
+
+ if (!vlan_present || !prio_mask) {
+ prio_val = UNKNOWN_MATCH_PRIO;
+ } else if (prio_mask != 0x7) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "masked priority match not supported for hairpin");
+ return -EOPNOTSUPP;
+ }
+
+ *match_prio = prio_val;
+ return 0;
+}
+
+static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ int peer_ifindex = parse_attr->mirred_ifindex[0];
+ struct mlx5_hairpin_params params;
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5e_hairpin_entry *hpe;
+ struct mlx5e_hairpin *hp;
+ u64 link_speed64;
+ u32 link_speed;
+ u8 match_prio;
+ u16 peer_id;
+ int err;
+
+ peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+ if (IS_ERR(peer_mdev)) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
+ return PTR_ERR(peer_mdev);
+ }
+
+ if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
+ NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
+ err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
+ extack);
+ if (err)
+ return err;
+
+ mutex_lock(&tc->hairpin_tbl_lock);
+ hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
+ if (hpe) {
+ mutex_unlock(&tc->hairpin_tbl_lock);
+ wait_for_completion(&hpe->res_ready);
+
+ if (IS_ERR(hpe->hp)) {
+ err = -EREMOTEIO;
+ goto out_err;
+ }
+ goto attach_flow;
+ }
+
+ hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
+ if (!hpe) {
+ mutex_unlock(&tc->hairpin_tbl_lock);
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&hpe->flows_lock);
+ INIT_LIST_HEAD(&hpe->flows);
+ INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
+ hpe->peer_vhca_id = peer_id;
+ hpe->prio = match_prio;
+ refcount_set(&hpe->refcnt, 1);
+ init_completion(&hpe->res_ready);
+
+ hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist,
+ hash_hairpin_info(peer_id, match_prio));
+ mutex_unlock(&tc->hairpin_tbl_lock);
+
+ params.log_data_size = 16;
+ params.log_data_size = min_t(u8, params.log_data_size,
+ MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
+ params.log_data_size = max_t(u8, params.log_data_size,
+ MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
+
+ params.log_num_packets = params.log_data_size -
+ MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
+ params.log_num_packets = min_t(u8, params.log_num_packets,
+ MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
+
+ params.q_counter = priv->q_counter;
+ /* set hairpin pair per each 50Gbs share of the link */
+ mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
+ link_speed = max_t(u32, link_speed, 50000);
+ link_speed64 = link_speed;
+ do_div(link_speed64, 50000);
+ params.num_channels = link_speed64;
+
+ hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
+ hpe->hp = hp;
+ complete_all(&hpe->res_ready);
+ if (IS_ERR(hp)) {
+ err = PTR_ERR(hp);
+ goto out_err;
+ }
+
+ netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
+ mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
+ dev_name(hp->pair->peer_mdev->device),
+ hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
+
+attach_flow:
+ if (hpe->hp->num_channels > 1) {
+ flow_flag_set(flow, HAIRPIN_RSS);
+ flow->attr->nic_attr->hairpin_ft =
+ mlx5_get_ttc_flow_table(hpe->hp->ttc);
+ } else {
+ flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
+ }
+
+ flow->hpe = hpe;
+ spin_lock(&hpe->flows_lock);
+ list_add(&flow->hairpin, &hpe->flows);
+ spin_unlock(&hpe->flows_lock);
+
+ return 0;
+
+out_err:
+ mlx5e_hairpin_put(priv, hpe);
+ return err;
+}
+
+static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ /* flow wasn't fully initialized */
+ if (!flow->hpe)
+ return;
+
+ spin_lock(&flow->hpe->flows_lock);
+ list_del(&flow->hairpin);
+ spin_unlock(&flow->hpe->flows_lock);
+
+ mlx5e_hairpin_put(priv, flow->hpe);
+ flow->hpe = NULL;
+}
+
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs);
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
+ struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_fs_chains *nic_chains;
+ struct mlx5_flow_act flow_act = {
+ .action = attr->action,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_table *ft;
+ int dest_ix = 0;
+
+ nic_chains = mlx5e_nic_chains(tc);
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = nic_attr->flow_tag;
+
+ if (attr->dest_ft) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = attr->dest_ft;
+ dest_ix++;
+ } else if (nic_attr->hairpin_ft) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = nic_attr->hairpin_ft;
+ dest_ix++;
+ } else if (nic_attr->hairpin_tirn) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
+ dest_ix++;
+ } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ if (attr->dest_chain) {
+ dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
+ attr->dest_chain, 1,
+ MLX5E_TC_FT_LEVEL);
+ if (IS_ERR(dest[dest_ix].ft))
+ return ERR_CAST(dest[dest_ix].ft);
+ } else {
+ dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan);
+ }
+ dest_ix++;
+ }
+
+ if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
+ dest_ix++;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ flow_act.modify_hdr = attr->modify_hdr;
+
+ mutex_lock(&tc->t_lock);
+ if (IS_ERR_OR_NULL(tc->t)) {
+ /* Create the root table here if doesn't exist yet */
+ tc->t =
+ mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
+
+ if (IS_ERR(tc->t)) {
+ mutex_unlock(&tc->t_lock);
+ netdev_err(priv->netdev,
+ "Failed to create tc offload table\n");
+ rule = ERR_CAST(tc->t);
+ goto err_ft_get;
+ }
+ }
+ mutex_unlock(&tc->t_lock);
+
+ if (attr->chain || attr->prio)
+ ft = mlx5_chains_get_table(nic_chains,
+ attr->chain, attr->prio,
+ MLX5E_TC_FT_LEVEL);
+ else
+ ft = attr->ft;
+
+ if (IS_ERR(ft)) {
+ rule = ERR_CAST(ft);
+ goto err_ft_get;
+ }
+
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+
+ rule = mlx5_add_flow_rules(ft, spec,
+ &flow_act, dest, dest_ix);
+ if (IS_ERR(rule))
+ goto err_rule;
+
+ return rule;
+
+err_rule:
+ if (attr->chain || attr->prio)
+ mlx5_chains_put_table(nic_chains,
+ attr->chain, attr->prio,
+ MLX5E_TC_FT_LEVEL);
+err_ft_get:
+ if (attr->dest_chain)
+ mlx5_chains_put_table(nic_chains,
+ attr->dest_chain, 1,
+ MLX5E_TC_FT_LEVEL);
+
+ return ERR_CAST(rule);
+}
+
+static int
+alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
+ struct mlx5_flow_attr *attr)
+
+{
+ struct mlx5_fc *counter;
+
+ counter = mlx5_fc_create(counter_dev, true);
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
+
+ attr->counter = counter;
+ return 0;
+}
+
+static int
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_core_dev *dev = priv->mdev;
+ int err;
+
+ parse_attr = attr->parse_attr;
+
+ if (flow_flag_test(flow, HAIRPIN)) {
+ err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
+ if (err)
+ return err;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ err = alloc_flow_attr_counter(dev, attr);
+ if (err)
+ return err;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ if (err)
+ return err;
+ }
+
+ if (attr->flags & MLX5_ATTR_FLAG_CT)
+ flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
+ attr, &parse_attr->mod_hdr_acts);
+ else
+ flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
+ attr);
+
+ return PTR_ERR_OR_ZERO(flow->rule[0]);
+}
+
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_fs_chains *nic_chains;
+
+ nic_chains = mlx5e_nic_chains(tc);
+ mlx5_del_flow_rules(rule);
+
+ if (attr->chain || attr->prio)
+ mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
+ MLX5E_TC_FT_LEVEL);
+
+ if (attr->dest_chain)
+ mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
+ MLX5E_TC_FT_LEVEL);
+}
+
+static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_flow_attr *attr = flow->attr;
+
+ flow_flag_clear(flow, OFFLOADED);
+
+ if (attr->flags & MLX5_ATTR_FLAG_CT)
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
+ else if (!IS_ERR_OR_NULL(flow->rule[0]))
+ mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
+
+ /* Remove root table if no rules are left to avoid
+ * extra steering hops.
+ */
+ mutex_lock(&tc->t_lock);
+ if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
+ !IS_ERR_OR_NULL(tc->t)) {
+ mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
+ tc->t = NULL;
+ }
+ mutex_unlock(&tc->t_lock);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
+ mlx5e_detach_mod_hdr(priv, flow);
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(priv->mdev, attr->counter);
+
+ if (flow_flag_test(flow, HAIRPIN))
+ mlx5e_hairpin_flow_del(priv, flow);
+
+ free_flow_post_acts(flow);
+
+ kvfree(attr->parse_attr);
+ kfree(flow->attr);
+}
+
+struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_flow_handle *rule;
+
+ if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+
+ rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
+
+ if (IS_ERR(rule))
+ return rule;
+
+ if (attr->esw_attr->split_count) {
+ flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
+ if (IS_ERR(flow->rule[1]))
+ goto err_rule1;
+ }
+
+ return rule;
+
+err_rule1:
+ mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
+ return flow->rule[1];
+}
+
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
+{
+ flow_flag_clear(flow, OFFLOADED);
+
+ if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
+ return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+
+ if (attr->esw_attr->split_count)
+ mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
+
+ mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
+}
+
+struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ struct mlx5e_mod_hdr_handle *mh = NULL;
+ struct mlx5_flow_attr *slow_attr;
+ struct mlx5_flow_handle *rule;
+ bool fwd_and_modify_cap;
+ u32 chain_mapping = 0;
+ int err;
+
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!slow_attr)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ slow_attr->esw_attr->split_count = 0;
+ slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+
+ fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table);
+ if (!fwd_and_modify_cap)
+ goto skip_restore;
+
+ err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping);
+ if (err)
+ goto err_get_chain;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ CHAIN_TO_REG, chain_mapping);
+ if (err)
+ goto err_reg_set;
+
+ mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow),
+ MLX5_FLOW_NAMESPACE_FDB, &mod_acts);
+ if (IS_ERR(mh)) {
+ err = PTR_ERR(mh);
+ goto err_attach;
+ }
+
+ slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh);
+
+skip_restore:
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto err_offload;
+ }
+
+ flow->slow_mh = mh;
+ flow->chain_mapping = chain_mapping;
+ flow_flag_set(flow, SLOW);
+
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ kfree(slow_attr);
+
+ return rule;
+
+err_offload:
+ if (fwd_and_modify_cap)
+ mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh);
+err_attach:
+err_reg_set:
+ if (fwd_and_modify_cap)
+ mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping);
+err_get_chain:
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ kfree(slow_attr);
+ return ERR_PTR(err);
+}
+
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_flow_attr *slow_attr;
+
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!slow_attr) {
+ mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
+ return;
+ }
+
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ slow_attr->esw_attr->split_count = 0;
+ slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+ if (flow->slow_mh) {
+ slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ slow_attr->modify_hdr = mlx5e_mod_hdr_get(flow->slow_mh);
+ }
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
+ if (flow->slow_mh) {
+ mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), flow->slow_mh);
+ mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping);
+ flow->chain_mapping = 0;
+ flow->slow_mh = NULL;
+ }
+ flow_flag_clear(flow, SLOW);
+ kfree(slow_attr);
+}
+
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
+ * function.
+ */
+static void unready_flow_add(struct mlx5e_tc_flow *flow,
+ struct list_head *unready_flows)
+{
+ flow_flag_set(flow, NOT_READY);
+ list_add_tail(&flow->unready, unready_flows);
+}
+
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
+ * function.
+ */
+static void unready_flow_del(struct mlx5e_tc_flow *flow)
+{
+ list_del(&flow->unready);
+ flow_flag_clear(flow, NOT_READY);
+}
+
+static void add_unready_flow(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5_eswitch *esw;
+
+ esw = flow->priv->mdev->priv.eswitch;
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &rpriv->uplink_priv;
+
+ mutex_lock(&uplink_priv->unready_flows_lock);
+ unready_flow_add(flow, &uplink_priv->unready_flows);
+ mutex_unlock(&uplink_priv->unready_flows_lock);
+}
+
+static void remove_unready_flow(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5_eswitch *esw;
+
+ esw = flow->priv->mdev->priv.eswitch;
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &rpriv->uplink_priv;
+
+ mutex_lock(&uplink_priv->unready_flows_lock);
+ if (flow_flag_test(flow, NOT_READY))
+ unready_flow_del(flow);
+ mutex_unlock(&uplink_priv->unready_flows_lock);
+}
+
+bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
+{
+ struct mlx5_core_dev *out_mdev, *route_mdev;
+ struct mlx5e_priv *out_priv, *route_priv;
+
+ out_priv = netdev_priv(out_dev);
+ out_mdev = out_priv->mdev;
+ route_priv = netdev_priv(route_dev);
+ route_mdev = route_priv->mdev;
+
+ if (out_mdev->coredev_type != MLX5_COREDEV_PF)
+ return false;
+
+ if (route_mdev->coredev_type != MLX5_COREDEV_VF &&
+ route_mdev->coredev_type != MLX5_COREDEV_SF)
+ return false;
+
+ return mlx5e_same_hw_devs(out_priv, route_priv);
+}
+
+int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
+{
+ struct mlx5e_priv *out_priv, *route_priv;
+ struct mlx5_core_dev *route_mdev;
+ struct mlx5_eswitch *esw;
+ u16 vhca_id;
+
+ out_priv = netdev_priv(out_dev);
+ esw = out_priv->mdev->priv.eswitch;
+ route_priv = netdev_priv(route_dev);
+ route_mdev = route_priv->mdev;
+
+ vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
+ if (mlx5_lag_is_active(out_priv->mdev)) {
+ struct mlx5_devcom *devcom;
+ int err;
+
+ /* In lag case we may get devices from different eswitch instances.
+ * If we failed to get vport num, it means, mostly, that we on the wrong
+ * eswitch.
+ */
+ err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+ if (err != -ENOENT)
+ return err;
+
+ rcu_read_lock();
+ devcom = out_priv->mdev->priv.devcom;
+ esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
+ rcu_read_unlock();
+
+ return err;
+ }
+
+ return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+}
+
+int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
+ struct mlx5_modify_hdr *mod_hdr;
+
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev,
+ mlx5e_get_flow_namespace(flow),
+ mod_hdr_acts->num_actions,
+ mod_hdr_acts->actions);
+ if (IS_ERR(mod_hdr))
+ return PTR_ERR(mod_hdr);
+
+ WARN_ON(attr->modify_hdr);
+ attr->modify_hdr = mod_hdr;
+
+ return 0;
+}
+
+static int
+set_encap_dests(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack,
+ bool *vf_tun)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct net_device *encap_dev = NULL;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *out_priv;
+ int out_index;
+ int err = 0;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return 0;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+ *vf_tun = false;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ struct net_device *out_dev;
+ int mirred_ifindex;
+
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ mirred_ifindex = parse_attr->mirred_ifindex[out_index];
+ out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
+ if (!out_dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
+ err = -ENODEV;
+ goto out;
+ }
+ err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
+ extack, &encap_dev);
+ dev_put(out_dev);
+ if (err)
+ goto out;
+
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+ !esw_attr->dest_int_port)
+ *vf_tun = true;
+
+ out_priv = netdev_priv(encap_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[out_index].rep = rpriv->rep;
+ esw_attr->dests[out_index].mdev = out_priv->mdev;
+ }
+
+ if (*vf_tun && esw_attr->out_count > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+static void
+clean_encap_dests(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ bool *vf_tun)
+{
+ struct mlx5_esw_flow_attr *esw_attr;
+ int out_index;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return;
+
+ esw_attr = attr->esw_attr;
+ *vf_tun = false;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+ !esw_attr->dest_int_port)
+ *vf_tun = true;
+
+ mlx5e_detach_encap(priv, flow, attr, out_index);
+ kfree(attr->parse_attr->tun_info[out_index]);
+ }
+}
+
+static int
+mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ u32 max_prio, max_chain;
+ bool vf_tun;
+ int err = 0;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ /* We check chain range only for tc flows.
+ * For ft flows, we checked attr->chain was originally 0 and set it to
+ * FDB_FT_CHAIN which is outside tc range.
+ * See mlx5e_rep_setup_ft_cb().
+ */
+ max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
+ if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested chain is out of supported range");
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
+ if (attr->prio > max_prio) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested priority is out of supported range");
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (flow_flag_test(flow, TUN_RX)) {
+ err = mlx5e_attach_decap_route(priv, flow);
+ if (err)
+ goto err_out;
+
+ if (!attr->chain && esw_attr->int_port &&
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ /* If decap route device is internal port, change the
+ * source vport value in reg_c0 back to uplink just in
+ * case the rule performs goto chain > 0. If we have a miss
+ * on chain > 0 we want the metadata regs to hold the
+ * chain id so SW will resume handling of this packet
+ * from the proper chain.
+ */
+ u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
+ esw_attr->in_rep->vport);
+
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
+ metadata);
+ if (err)
+ goto err_out;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ }
+ }
+
+ if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
+ err = mlx5e_attach_decap(priv, flow, extack);
+ if (err)
+ goto err_out;
+ }
+
+ if (netif_is_ovs_master(parse_attr->filter_dev)) {
+ struct mlx5e_tc_int_port *int_port;
+
+ if (attr->chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Internal port rule is only supported on chain 0");
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (attr->dest_chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Internal port rule offload doesn't support goto action");
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
+ parse_attr->filter_dev->ifindex,
+ flow_flag_test(flow, EGRESS) ?
+ MLX5E_TC_INT_PORT_EGRESS :
+ MLX5E_TC_INT_PORT_INGRESS);
+ if (IS_ERR(int_port)) {
+ err = PTR_ERR(int_port);
+ goto err_out;
+ }
+
+ esw_attr->int_port = int_port;
+ }
+
+ err = set_encap_dests(priv, flow, attr, extack, &vf_tun);
+ if (err)
+ goto err_out;
+
+ err = mlx5_eswitch_add_vlan_action(esw, attr);
+ if (err)
+ goto err_out;
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ if (vf_tun) {
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
+ if (err)
+ goto err_out;
+ } else {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ if (err)
+ goto err_out;
+ }
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
+ if (err)
+ goto err_out;
+ }
+
+ /* we get here if one of the following takes place:
+ * (1) there's no error
+ * (2) there's an encap action and we don't have valid neigh
+ */
+ if (flow_flag_test(flow, SLOW))
+ flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
+ else
+ flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
+
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ goto err_out;
+ }
+ flow_flag_set(flow, OFFLOADED);
+
+ return 0;
+
+err_out:
+ flow_flag_set(flow, FAILED);
+ return err;
+}
+
+static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
+ void *headers_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ misc_parameters_3);
+ u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
+ headers_v,
+ geneve_tlv_option_0_data);
+
+ return !!geneve_tlv_opt_0_data;
+}
+
+static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ bool vf_tun;
+
+ esw_attr = attr->esw_attr;
+ mlx5e_put_flow_tunnel_id(flow);
+
+ remove_unready_flow(flow);
+
+ if (mlx5e_is_offloaded_flow(flow)) {
+ if (flow_flag_test(flow, SLOW))
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ else
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
+ }
+ complete_all(&flow->del_hw_done);
+
+ if (mlx5_flow_has_geneve_opt(flow))
+ mlx5_geneve_tlv_option_del(priv->mdev->geneve);
+
+ mlx5_eswitch_del_vlan_action(esw, attr);
+
+ if (flow->decap_route)
+ mlx5e_detach_decap_route(priv, flow);
+
+ clean_encap_dests(priv, flow, attr, &vf_tun);
+
+ mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
+ if (vf_tun && attr->modify_hdr)
+ mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
+ else
+ mlx5e_detach_mod_hdr(priv, flow);
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
+
+ if (esw_attr->int_port)
+ mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
+
+ if (esw_attr->dest_int_port)
+ mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
+
+ if (flow_flag_test(flow, L3_TO_L2_DECAP))
+ mlx5e_detach_decap(priv, flow);
+
+ free_flow_post_acts(flow);
+
+ if (flow->attr->lag.count)
+ mlx5_lag_del_mpesw_rule(esw->dev);
+
+ kvfree(attr->esw_attr->rx_tun_attr);
+ kvfree(attr->parse_attr);
+ kfree(flow->attr);
+}
+
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_flow_attr *attr;
+
+ attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
+ return attr->counter;
+}
+
+/* Iterate over tmp_list of flows attached to flow_list head. */
+void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
+{
+ struct mlx5e_tc_flow *flow, *tmp;
+
+ list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
+ mlx5e_flow_put(priv, flow);
+}
+
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+
+ if (!flow_flag_test(flow, ESWITCH) ||
+ !flow_flag_test(flow, DUP))
+ return;
+
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_del(&flow->peer);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+ flow_flag_clear(flow, DUP);
+
+ if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
+ mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+ kfree(flow->peer_flow);
+ }
+
+ flow->peer_flow = NULL;
+}
+
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_core_dev *dev = flow->priv->mdev;
+ struct mlx5_devcom *devcom = dev->priv.devcom;
+ struct mlx5_eswitch *peer_esw;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return;
+
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ if (mlx5e_is_eswitch_flow(flow)) {
+ mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ } else {
+ mlx5e_tc_del_nic_flow(priv, flow);
+ }
+}
+
+static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_action *flow_action = &rule->action;
+ const struct flow_action_entry *act;
+ int i;
+
+ if (chain)
+ return false;
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_GOTO:
+ return true;
+ case FLOW_ACTION_SAMPLE:
+ return true;
+ default:
+ continue;
+ }
+ }
+
+ return false;
+}
+
+static int
+enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
+ struct flow_dissector_key_enc_opts *opts,
+ struct netlink_ext_ack *extack,
+ bool *dont_care)
+{
+ struct geneve_opt *opt;
+ int off = 0;
+
+ *dont_care = true;
+
+ while (opts->len > off) {
+ opt = (struct geneve_opt *)&opts->data[off];
+
+ if (!(*dont_care) || opt->opt_class || opt->type ||
+ memchr_inv(opt->opt_data, 0, opt->length * 4)) {
+ *dont_care = false;
+
+ if (opt->opt_class != htons(U16_MAX) ||
+ opt->type != U8_MAX) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Partial match of tunnel options in chain > 0 isn't supported");
+ netdev_warn(priv->netdev,
+ "Partial match of tunnel options in chain > 0 isn't supported");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ off += sizeof(struct geneve_opt) + opt->length * 4;
+ }
+
+ return 0;
+}
+
+#define COPY_DISSECTOR(rule, diss_key, dst)\
+({ \
+ struct flow_rule *__rule = (rule);\
+ typeof(dst) __dst = dst;\
+\
+ memcpy(__dst,\
+ skb_flow_dissector_target(__rule->match.dissector,\
+ diss_key,\
+ __rule->match.key),\
+ sizeof(*__dst));\
+})
+
+static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct flow_cls_offload *f,
+ struct net_device *filter_dev)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
+ struct flow_match_enc_opts enc_opts_match;
+ struct tunnel_match_enc_opts tun_enc_opts;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct tunnel_match_key tunnel_key;
+ bool enc_opts_is_dont_care = true;
+ u32 tun_id, enc_opts_id = 0;
+ struct mlx5_eswitch *esw;
+ u32 value, mask;
+ int err;
+
+ esw = priv->mdev->priv.eswitch;
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ memset(&tunnel_key, 0, sizeof(tunnel_key));
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ &tunnel_key.enc_control);
+ if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ &tunnel_key.enc_ipv4);
+ else
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ &tunnel_key.enc_ipv6);
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
+ &tunnel_key.enc_tp);
+ COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
+ &tunnel_key.enc_key_id);
+ tunnel_key.filter_ifindex = filter_dev->ifindex;
+
+ err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
+ if (err)
+ return err;
+
+ flow_rule_match_enc_opts(rule, &enc_opts_match);
+ err = enc_opts_is_dont_care_or_full_match(priv,
+ enc_opts_match.mask,
+ extack,
+ &enc_opts_is_dont_care);
+ if (err)
+ goto err_enc_opts;
+
+ if (!enc_opts_is_dont_care) {
+ memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
+ memcpy(&tun_enc_opts.key, enc_opts_match.key,
+ sizeof(*enc_opts_match.key));
+ memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
+ sizeof(*enc_opts_match.mask));
+
+ err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
+ &tun_enc_opts, &enc_opts_id);
+ if (err)
+ goto err_enc_opts;
+ }
+
+ value = tun_id << ENC_OPTS_BITS | enc_opts_id;
+ mask = enc_opts_id ? TUNNEL_ID_MASK :
+ (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
+
+ if (attr->chain) {
+ mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
+ TUNNEL_TO_REG, value, mask);
+ } else {
+ mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
+ err = mlx5e_tc_match_to_reg_set(priv->mdev,
+ mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
+ TUNNEL_TO_REG, value);
+ if (err)
+ goto err_set;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ }
+
+ flow->attr->tunnel_id = value;
+ return 0;
+
+err_set:
+ if (enc_opts_id)
+ mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
+ enc_opts_id);
+err_enc_opts:
+ mapping_remove(uplink_priv->tunnel_mapping, tun_id);
+ return err;
+}
+
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
+{
+ u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
+ u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5_eswitch *esw;
+
+ esw = flow->priv->mdev->priv.eswitch;
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ if (tun_id)
+ mapping_remove(uplink_priv->tunnel_mapping, tun_id);
+ if (enc_opts_id)
+ mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
+ enc_opts_id);
+}
+
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
+ struct flow_match_basic *match, bool outer,
+ void *headers_c, void *headers_v)
+{
+ bool ip_version_cap;
+
+ ip_version_cap = outer ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
+
+ if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
+ (match->key->n_proto == htons(ETH_P_IP) ||
+ match->key->n_proto == htons(ETH_P_IPV6))) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
+ match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ ntohs(match->mask->n_proto));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ ntohs(match->key->n_proto));
+ }
+}
+
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
+{
+ void *headers_v;
+ u16 ethertype;
+ u8 ip_version;
+
+ if (outer)
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+ else
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
+
+ ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
+ /* Return ip_version converted from ethertype anyway */
+ if (!ip_version) {
+ ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
+ if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
+ ip_version = 4;
+ else if (ethertype == ETH_P_IPV6)
+ ip_version = 6;
+ }
+ return ip_version;
+}
+
+/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
+ * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
+ * +---------+----------------------------------------+
+ * |Arriving | Arriving Outer Header |
+ * | Inner +---------+---------+---------+----------+
+ * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
+ * +---------+---------+---------+---------+----------+
+ * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
+ * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
+ * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
+ * | CE | CE | CE | CE | CE |
+ * +---------+---------+---------+---------+----------+
+ *
+ * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
+ * the inner ip_ecn value before hardware decap action.
+ *
+ * Cells marked are changed from original inner packet ip_ecn value during decap, and
+ * so matching those values on inner ip_ecn before decap will fail.
+ *
+ * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
+ * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
+ * and such we can drop the inner ip_ecn=CE match.
+ */
+
+static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f,
+ bool *match_inner_ecn)
+{
+ u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ip match;
+
+ *match_inner_ecn = true;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ flow_rule_match_enc_ip(rule, &match);
+ outer_ecn_key = match.key->tos & INET_ECN_MASK;
+ outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+ flow_rule_match_ip(rule, &match);
+ inner_ecn_key = match.key->tos & INET_ECN_MASK;
+ inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
+ }
+
+ if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
+ NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
+ netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!outer_ecn_mask) {
+ if (!inner_ecn_mask)
+ return 0;
+
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
+ netdev_warn(priv->netdev,
+ "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
+ netdev_warn(priv->netdev,
+ "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!inner_ecn_mask)
+ return 0;
+
+ /* Both inner and outer have full mask on ecn */
+
+ if (outer_ecn_key == INET_ECN_ECT_1) {
+ /* inner ecn might change by DECAP action */
+
+ NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
+ netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (outer_ecn_key != INET_ECN_CE)
+ return 0;
+
+ if (inner_ecn_key != INET_ECN_CE) {
+ /* Can't happen in software, as packet ecn will be changed to CE after decap */
+ NL_SET_ERR_MSG_MOD(extack,
+ "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
+ netdev_warn(priv->netdev,
+ "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
+ * drop match on inner ecn
+ */
+ *match_inner_ecn = false;
+
+ return 0;
+}
+
+static int parse_tunnel_attr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct net_device *filter_dev,
+ u8 *match_level,
+ bool *match_inner)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct netlink_ext_ack *extack = f->common.extack;
+ bool needs_mapping, sets_mapping;
+ int err;
+
+ if (!mlx5e_is_eswitch_flow(flow)) {
+ NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ needs_mapping = !!flow->attr->chain;
+ sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
+ *match_inner = !needs_mapping;
+
+ if ((needs_mapping || sets_mapping) &&
+ !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Chains on tunnel devices isn't supported without register loopback support");
+ netdev_warn(priv->netdev,
+ "Chains on tunnel devices isn't supported without register loopback support");
+ return -EOPNOTSUPP;
+ }
+
+ if (!flow->attr->chain) {
+ err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
+ match_level);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to parse tunnel attributes");
+ netdev_warn(priv->netdev,
+ "Failed to parse tunnel attributes");
+ return err;
+ }
+
+ /* With mpls over udp we decapsulate using packet reformat
+ * object
+ */
+ if (!netif_is_bareudp(filter_dev))
+ flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ err = mlx5e_tc_set_attr_rx_tun(flow, spec);
+ if (err)
+ return err;
+ } else if (tunnel) {
+ struct mlx5_flow_spec *tmp_spec;
+
+ tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
+ if (!tmp_spec) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec");
+ netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec");
+ return -ENOMEM;
+ }
+ memcpy(tmp_spec, spec, sizeof(*tmp_spec));
+
+ err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
+ if (err) {
+ kvfree(tmp_spec);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
+ netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
+ return err;
+ }
+ err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
+ kvfree(tmp_spec);
+ if (err)
+ return err;
+ }
+
+ if (!needs_mapping && !sets_mapping)
+ return 0;
+
+ return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
+}
+
+static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
+{
+ return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ inner_headers);
+}
+
+static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
+{
+ return MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ inner_headers);
+}
+
+static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
+{
+ return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+}
+
+static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
+{
+ return MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+}
+
+void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
+{
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
+ get_match_inner_headers_value(spec) :
+ get_match_outer_headers_value(spec);
+}
+
+void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
+{
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
+ get_match_inner_headers_criteria(spec) :
+ get_match_outer_headers_criteria(spec);
+}
+
+static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct net_device *ingress_dev;
+ struct flow_match_meta match;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
+ return 0;
+
+ flow_rule_match_meta(rule, &match);
+ if (!match.mask->ingress_ifindex)
+ return 0;
+
+ if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
+ return -EOPNOTSUPP;
+ }
+
+ ingress_dev = __dev_get_by_index(dev_net(filter_dev),
+ match.key->ingress_ifindex);
+ if (!ingress_dev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't find the ingress port to match on");
+ return -ENOENT;
+ }
+
+ if (ingress_dev != filter_dev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't match on the ingress filter port");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static bool skip_key_basic(struct net_device *filter_dev,
+ struct flow_cls_offload *f)
+{
+ /* When doing mpls over udp decap, the user needs to provide
+ * MPLS_UC as the protocol in order to be able to match on mpls
+ * label fields. However, the actual ethertype is IP so we want to
+ * avoid matching on this, otherwise we'll fail the match.
+ */
+ if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
+ return true;
+
+ return false;
+}
+
+static int __parse_cls_flower(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct net_device *filter_dev,
+ u8 *inner_match_level, u8 *outer_match_level)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_3);
+ void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_3);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_dissector *dissector = rule->match.dissector;
+ enum fs_flow_table_type fs_type;
+ bool match_inner_ecn = true;
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+ u8 *match_level;
+ int err;
+
+ fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
+ match_level = outer_match_level;
+
+ if (dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_META) |
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN) |
+ BIT(FLOW_DISSECTOR_KEY_CVLAN) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_TCP) |
+ BIT(FLOW_DISSECTOR_KEY_IP) |
+ BIT(FLOW_DISSECTOR_KEY_CT) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+ BIT(FLOW_DISSECTOR_KEY_ICMP) |
+ BIT(FLOW_DISSECTOR_KEY_MPLS))) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
+ netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
+ dissector->used_keys);
+ return -EOPNOTSUPP;
+ }
+
+ if (mlx5e_get_tc_tun(filter_dev)) {
+ bool match_inner = false;
+
+ err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
+ outer_match_level, &match_inner);
+ if (err)
+ return err;
+
+ if (match_inner) {
+ /* header pointers should point to the inner headers
+ * if the packet was decapsulated already.
+ * outer headers are set by parse_tunnel_attr.
+ */
+ match_level = inner_match_level;
+ headers_c = get_match_inner_headers_criteria(spec);
+ headers_v = get_match_inner_headers_value(spec);
+ }
+
+ err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_flower_parse_meta(filter_dev, f);
+ if (err)
+ return err;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
+ !skip_key_basic(filter_dev, f)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+ mlx5e_tc_set_ethertype(priv->mdev, &match,
+ match_level == outer_match_level,
+ headers_c, headers_v);
+
+ if (match.mask->n_proto)
+ *match_level = MLX5_MATCH_L2;
+ }
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
+ is_vlan_dev(filter_dev)) {
+ struct flow_dissector_key_vlan filter_dev_mask;
+ struct flow_dissector_key_vlan filter_dev_key;
+ struct flow_match_vlan match;
+
+ if (is_vlan_dev(filter_dev)) {
+ match.key = &filter_dev_key;
+ match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
+ match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
+ match.key->vlan_priority = 0;
+ match.mask = &filter_dev_mask;
+ memset(match.mask, 0xff, sizeof(*match.mask));
+ match.mask->vlan_priority = 0;
+ } else {
+ flow_rule_match_vlan(rule, &match);
+ }
+ if (match.mask->vlan_id ||
+ match.mask->vlan_priority ||
+ match.mask->vlan_tpid) {
+ if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ svlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ svlan_tag, 1);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ cvlan_tag, 1);
+ }
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
+ match.mask->vlan_id);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
+ match.key->vlan_id);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
+ match.mask->vlan_priority);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
+ match.key->vlan_priority);
+
+ *match_level = MLX5_MATCH_L2;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
+ match.mask->vlan_eth_type &&
+ MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
+ ft_field_support.outer_second_vid,
+ fs_type)) {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_cvlan_tag, 1);
+ spec->match_criteria_enable |=
+ MLX5_MATCH_MISC_PARAMETERS;
+ }
+ }
+ } else if (*match_level != MLX5_MATCH_NONE) {
+ /* cvlan_tag enabled in match criteria and
+ * disabled in match value means both S & C tags
+ * don't exist (untagged of both)
+ */
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
+ *match_level = MLX5_MATCH_L2;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+ struct flow_match_vlan match;
+
+ flow_rule_match_cvlan(rule, &match);
+ if (match.mask->vlan_id ||
+ match.mask->vlan_priority ||
+ match.mask->vlan_tpid) {
+ if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
+ fs_type)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on CVLAN is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_svlan_tag, 1);
+ MLX5_SET(fte_match_set_misc, misc_v,
+ outer_second_svlan_tag, 1);
+ } else {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_cvlan_tag, 1);
+ MLX5_SET(fte_match_set_misc, misc_v,
+ outer_second_cvlan_tag, 1);
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
+ match.mask->vlan_id);
+ MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
+ match.key->vlan_id);
+ MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
+ match.mask->vlan_priority);
+ MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
+ match.key->vlan_priority);
+
+ *match_level = MLX5_MATCH_L2;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_match_eth_addrs match;
+
+ flow_rule_match_eth_addrs(rule, &match);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dmac_47_16),
+ match.mask->dst);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16),
+ match.key->dst);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ smac_47_16),
+ match.mask->src);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ smac_47_16),
+ match.key->src);
+
+ if (!is_zero_ether_addr(match.mask->src) ||
+ !is_zero_ether_addr(match.mask->dst))
+ *match_level = MLX5_MATCH_L2;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_match_control match;
+
+ flow_rule_match_control(rule, &match);
+ addr_type = match.key->addr_type;
+
+ /* the HW doesn't support frag first/later */
+ if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
+ NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
+ match.key->flags & FLOW_DIS_IS_FRAGMENT);
+
+ /* the HW doesn't need L3 inline to match on frag=no */
+ if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
+ *match_level = MLX5_MATCH_L2;
+ /* *** L2 attributes parsing up to here *** */
+ else
+ *match_level = MLX5_MATCH_L3;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+ ip_proto = match.key->ip_proto;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ match.mask->ip_proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ match.key->ip_proto);
+
+ if (match.mask->ip_proto)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.key->src, sizeof(match.key->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.key->dst, sizeof(match.key->dst));
+
+ if (match.mask->src || match.mask->dst)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, sizeof(match.key->src));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, sizeof(match.key->dst));
+
+ if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
+ ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+ struct flow_match_ip match;
+
+ flow_rule_match_ip(rule, &match);
+ if (match_inner_ecn) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+ match.mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+ match.key->tos & 0x3);
+ }
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+ match.mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+ match.key->tos >> 2);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+ match.mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+ match.key->ttl);
+
+ if (match.mask->ttl &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_ipv4_ttl)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on TTL is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (match.mask->tos || match.mask->ttl)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ /* *** L3 attributes parsing up to here *** */
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_dport, ntohs(match.key->dst));
+ break;
+
+ case IPPROTO_UDP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(match.key->dst));
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only UDP and TCP transports are supported for L4 matching");
+ netdev_err(priv->netdev,
+ "Only UDP and TCP transport are supported\n");
+ return -EINVAL;
+ }
+
+ if (match.mask->src || match.mask->dst)
+ *match_level = MLX5_MATCH_L4;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_match_tcp match;
+
+ flow_rule_match_tcp(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(match.mask->flags));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
+ ntohs(match.key->flags));
+
+ if (match.mask->flags)
+ *match_level = MLX5_MATCH_L4;
+ }
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
+ struct flow_match_icmp match;
+
+ flow_rule_match_icmp(rule, &match);
+ switch (ip_proto) {
+ case IPPROTO_ICMP:
+ if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+ MLX5_FLEX_PROTO_ICMP)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Match on Flex protocols for ICMP is not supported");
+ return -EOPNOTSUPP;
+ }
+ MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
+ match.mask->type);
+ MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
+ match.key->type);
+ MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
+ match.mask->code);
+ MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
+ match.key->code);
+ break;
+ case IPPROTO_ICMPV6:
+ if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+ MLX5_FLEX_PROTO_ICMPV6)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Match on Flex protocols for ICMPV6 is not supported");
+ return -EOPNOTSUPP;
+ }
+ MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
+ match.mask->type);
+ MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
+ match.key->type);
+ MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
+ match.mask->code);
+ MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
+ match.key->code);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Code and type matching only with ICMP and ICMPv6");
+ netdev_err(priv->netdev,
+ "Code and type matching only with ICMP and ICMPv6\n");
+ return -EINVAL;
+ }
+ if (match.mask->code || match.mask->type) {
+ *match_level = MLX5_MATCH_L4;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+ }
+ }
+ /* Currently supported only for MPLS over UDP */
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
+ !netif_is_bareudp(filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on MPLS is supported only for MPLS over UDP");
+ netdev_err(priv->netdev,
+ "Matching on MPLS is supported only for MPLS over UDP\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int parse_cls_flower(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct net_device *filter_dev)
+{
+ u8 inner_match_level, outer_match_level, non_tunnel_match_level;
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
+ bool is_eswitch_flow;
+ int err;
+
+ inner_match_level = MLX5_MATCH_NONE;
+ outer_match_level = MLX5_MATCH_NONE;
+
+ err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
+ &inner_match_level, &outer_match_level);
+ non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
+ outer_match_level : inner_match_level;
+
+ is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
+ if (!err && is_eswitch_flow) {
+ rep = rpriv->rep;
+ if (rep->vport != MLX5_VPORT_UPLINK &&
+ (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
+ esw->offloads.inline_mode < non_tunnel_match_level)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Flow is not offloaded due to min inline setting");
+ netdev_warn(priv->netdev,
+ "Flow is not offloaded due to min inline setting, required %d actual %d\n",
+ non_tunnel_match_level, esw->offloads.inline_mode);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ flow->attr->inner_match_level = inner_match_level;
+ flow->attr->outer_match_level = outer_match_level;
+
+
+ return err;
+}
+
+struct mlx5_fields {
+ u8 field;
+ u8 field_bsize;
+ u32 field_mask;
+ u32 offset;
+ u32 match_offset;
+};
+
+#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
+ {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
+ offsetof(struct pedit_headers, field) + (off), \
+ MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
+
+/* masked values are the same and there are no rewrites that do not have a
+ * match.
+ */
+#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
+ type matchmaskx = *(type *)(matchmaskp); \
+ type matchvalx = *(type *)(matchvalp); \
+ type maskx = *(type *)(maskp); \
+ type valx = *(type *)(valp); \
+ \
+ (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
+ matchmaskx)); \
+})
+
+static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
+ void *matchmaskp, u8 bsize)
+{
+ bool same = false;
+
+ switch (bsize) {
+ case 8:
+ same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
+ break;
+ case 16:
+ same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
+ break;
+ case 32:
+ same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
+ break;
+ }
+
+ return same;
+}
+
+static struct mlx5_fields fields[] = {
+ OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
+ OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
+ OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
+ OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0),
+ OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype),
+ OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
+
+ OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp),
+ OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit),
+ OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+
+ OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
+ OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
+ OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
+ OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
+ OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
+ OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
+ OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
+ OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
+ OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
+ OFFLOAD(IP_DSCP, 16, 0x0fc0, ip6, 0, ip_dscp),
+
+ OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
+ OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
+ /* in linux iphdr tcp_flags is 8 bits long */
+ OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags),
+
+ OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
+ OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
+};
+
+static u32 mask_field_get(void *mask, struct mlx5_fields *f)
+{
+ switch (f->field_bsize) {
+ case 32:
+ return be32_to_cpu(*(__be32 *)mask) & f->field_mask;
+ case 16:
+ return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask;
+ default:
+ return *(u8 *)mask & (u8)f->field_mask;
+ }
+}
+
+static void mask_field_clear(void *mask, struct mlx5_fields *f)
+{
+ switch (f->field_bsize) {
+ case 32:
+ *(__be32 *)mask &= ~cpu_to_be32(f->field_mask);
+ break;
+ case 16:
+ *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask);
+ break;
+ default:
+ *(u8 *)mask &= ~(u8)f->field_mask;
+ break;
+ }
+}
+
+static int offload_pedit_fields(struct mlx5e_priv *priv,
+ int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
+ struct pedit_headers_action *hdrs = parse_attr->hdrs;
+ void *headers_c, *headers_v, *action, *vals_p;
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ void *s_masks_p, *a_masks_p;
+ int i, first, last, next_z;
+ struct mlx5_fields *f;
+ unsigned long mask;
+ u32 s_mask, a_mask;
+ u8 cmd;
+
+ mod_acts = &parse_attr->mod_hdr_acts;
+ headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
+ headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
+
+ set_masks = &hdrs[0].masks;
+ add_masks = &hdrs[1].masks;
+ set_vals = &hdrs[0].vals;
+ add_vals = &hdrs[1].vals;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ bool skip;
+
+ f = &fields[i];
+ s_masks_p = (void *)set_masks + f->offset;
+ a_masks_p = (void *)add_masks + f->offset;
+
+ s_mask = mask_field_get(s_masks_p, f);
+ a_mask = mask_field_get(a_masks_p, f);
+
+ if (!s_mask && !a_mask) /* nothing to offload here */
+ continue;
+
+ if (s_mask && a_mask) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't set and add to the same HW field");
+ netdev_warn(priv->netdev,
+ "mlx5: can't set and add to the same HW field (%x)\n",
+ f->field);
+ return -EOPNOTSUPP;
+ }
+
+ skip = false;
+ if (s_mask) {
+ void *match_mask = headers_c + f->match_offset;
+ void *match_val = headers_v + f->match_offset;
+
+ cmd = MLX5_ACTION_TYPE_SET;
+ mask = s_mask;
+ vals_p = (void *)set_vals + f->offset;
+ /* don't rewrite if we have a match on the same value */
+ if (cmp_val_mask(vals_p, s_masks_p, match_val,
+ match_mask, f->field_bsize))
+ skip = true;
+ /* clear to denote we consumed this field */
+ mask_field_clear(s_masks_p, f);
+ } else {
+ cmd = MLX5_ACTION_TYPE_ADD;
+ mask = a_mask;
+ vals_p = (void *)add_vals + f->offset;
+ /* add 0 is no change */
+ if (!mask_field_get(vals_p, f))
+ skip = true;
+ /* clear to denote we consumed this field */
+ mask_field_clear(a_masks_p, f);
+ }
+ if (skip)
+ continue;
+
+ first = find_first_bit(&mask, f->field_bsize);
+ next_z = find_next_zero_bit(&mask, f->field_bsize, first);
+ last = find_last_bit(&mask, f->field_bsize);
+ if (first < next_z && next_z < last) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "rewrite of few sub-fields isn't supported");
+ netdev_warn(priv->netdev,
+ "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
+ mask);
+ return -EOPNOTSUPP;
+ }
+
+ action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
+ if (IS_ERR(action)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "too many pedit actions, can't offload");
+ mlx5_core_warn(priv->mdev,
+ "mlx5: parsed %d pedit actions, can't do more\n",
+ mod_acts->num_actions);
+ return PTR_ERR(action);
+ }
+
+ MLX5_SET(set_action_in, action, action_type, cmd);
+ MLX5_SET(set_action_in, action, field, f->field);
+
+ if (cmd == MLX5_ACTION_TYPE_SET) {
+ unsigned long field_mask = f->field_mask;
+ int start;
+
+ /* if field is bit sized it can start not from first bit */
+ start = find_first_bit(&field_mask, f->field_bsize);
+
+ MLX5_SET(set_action_in, action, offset, first - start);
+ /* length is num of bits to be written, zero means length of 32 */
+ MLX5_SET(set_action_in, action, length, (last - first + 1));
+ }
+
+ if (f->field_bsize == 32)
+ MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
+ else if (f->field_bsize == 16)
+ MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
+ else if (f->field_bsize == 8)
+ MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
+
+ ++mod_acts->num_actions;
+ }
+
+ return 0;
+}
+
+static const struct pedit_headers zero_masks = {};
+
+static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct netlink_ext_ack *extack)
+{
+ struct pedit_headers *cmd_masks;
+ u8 cmd;
+
+ for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
+ cmd_masks = &parse_attr->hdrs[cmd].masks;
+ if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
+ NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
+ netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
+ print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
+ 16, 1, cmd_masks, sizeof(zero_masks), true);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action_flags,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
+ if (err)
+ goto out_dealloc_parsed_actions;
+
+ err = verify_offload_pedit_fields(priv, parse_attr, extack);
+ if (err)
+ goto out_dealloc_parsed_actions;
+
+ return 0;
+
+out_dealloc_parsed_actions:
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
+ return err;
+}
+
+struct ip_ttl_word {
+ __u8 ttl;
+ __u8 protocol;
+ __sum16 check;
+};
+
+struct ipv6_hoplimit_word {
+ __be16 payload_len;
+ __u8 nexthdr;
+ __u8 hop_limit;
+};
+
+static bool
+is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow,
+ bool *modify_ip_header, bool *modify_tuple,
+ struct netlink_ext_ack *extack)
+{
+ u32 mask, offset;
+ u8 htype;
+
+ htype = act->mangle.htype;
+ offset = act->mangle.offset;
+ mask = ~act->mangle.mask;
+ /* For IPv4 & IPv6 header check 4 byte word,
+ * to determine that modified fields
+ * are NOT ttl & hop_limit only.
+ */
+ if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
+ struct ip_ttl_word *ttl_word =
+ (struct ip_ttl_word *)&mask;
+
+ if (offset != offsetof(struct iphdr, ttl) ||
+ ttl_word->protocol ||
+ ttl_word->check) {
+ *modify_ip_header = true;
+ }
+
+ if (offset >= offsetof(struct iphdr, saddr))
+ *modify_tuple = true;
+
+ if (ct_flow && *modify_tuple) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload re-write of ipv4 address with action ct");
+ return false;
+ }
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
+ struct ipv6_hoplimit_word *hoplimit_word =
+ (struct ipv6_hoplimit_word *)&mask;
+
+ if (offset != offsetof(struct ipv6hdr, payload_len) ||
+ hoplimit_word->payload_len ||
+ hoplimit_word->nexthdr) {
+ *modify_ip_header = true;
+ }
+
+ if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
+ *modify_tuple = true;
+
+ if (ct_flow && *modify_tuple) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload re-write of ipv6 address with action ct");
+ return false;
+ }
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
+ htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
+ *modify_tuple = true;
+ if (ct_flow) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload re-write of transport header ports with action ct");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
+ bool ct_flow, struct netlink_ext_ack *extack,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec)
+{
+ if (!modify_tuple || ct_clear)
+ return true;
+
+ if (ct_flow) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload tuple modification with non-clear ct()");
+ netdev_info(priv->netdev,
+ "can't offload tuple modification with non-clear ct()");
+ return false;
+ }
+
+ /* Add ct_state=-trk match so it will be offloaded for non ct flows
+ * (or after clear action), as otherwise, since the tuple is changed,
+ * we can't restore ct state
+ */
+ if (mlx5_tc_ct_add_no_trk_match(spec)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload tuple modification with ct matches and no ct(clear) action");
+ netdev_info(priv->netdev,
+ "can't offload tuple modification with ct matches and no ct(clear) action");
+ return false;
+ }
+
+ return true;
+}
+
+static bool modify_header_match_supported(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_action *flow_action,
+ u32 actions, bool ct_flow,
+ bool ct_clear,
+ struct netlink_ext_ack *extack)
+{
+ const struct flow_action_entry *act;
+ bool modify_ip_header, modify_tuple;
+ void *headers_c;
+ void *headers_v;
+ u16 ethertype;
+ u8 ip_proto;
+ int i;
+
+ headers_c = mlx5e_get_match_headers_criteria(actions, spec);
+ headers_v = mlx5e_get_match_headers_value(actions, spec);
+ ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
+
+ /* for non-IP we only re-write MACs, so we're okay */
+ if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
+ ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
+ goto out_ok;
+
+ modify_ip_header = false;
+ modify_tuple = false;
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id != FLOW_ACTION_MANGLE &&
+ act->id != FLOW_ACTION_ADD)
+ continue;
+
+ if (!is_action_keys_supported(act, ct_flow,
+ &modify_ip_header,
+ &modify_tuple, extack))
+ return false;
+ }
+
+ if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
+ priv, spec))
+ return false;
+
+ ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
+ if (modify_ip_header && ip_proto != IPPROTO_TCP &&
+ ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload re-write of non TCP/UDP");
+ netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
+ ip_proto);
+ return false;
+ }
+
+out_ok:
+ return true;
+}
+
+static bool
+actions_match_supported_fdb(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ bool ct_flow, ct_clear;
+
+ ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
+ ct_flow = flow_flag_test(flow, CT) && !ct_clear;
+
+ if (esw_attr->split_count && ct_flow &&
+ !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) {
+ /* All registers used by ct are cleared when using
+ * split rules.
+ */
+ NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
+ return false;
+ }
+
+ if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "current firmware doesn't support split rule for port mirroring");
+ netdev_warn_once(priv->netdev,
+ "current firmware doesn't support split rule for port mirroring\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+actions_match_supported(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ u32 actions,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ bool ct_flow, ct_clear;
+
+ ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
+ ct_flow = flow_flag_test(flow, CT) && !ct_clear;
+
+ if (!(actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
+ return false;
+ }
+
+ if (!(~actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
+ return false;
+ }
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
+ return false;
+ }
+
+ if (!(~actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
+ return false;
+ }
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
+ return false;
+ }
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
+ actions, ct_flow, ct_clear, extack))
+ return false;
+
+ if (mlx5e_is_eswitch_flow(flow) &&
+ !actions_match_supported_fdb(priv, parse_attr, flow, extack))
+ return false;
+
+ return true;
+}
+
+static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+{
+ return priv->mdev == peer_priv->mdev;
+}
+
+bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+{
+ struct mlx5_core_dev *fmdev, *pmdev;
+ u64 fsystem_guid, psystem_guid;
+
+ fmdev = priv->mdev;
+ pmdev = peer_priv->mdev;
+
+ fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
+ psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
+
+ return (fsystem_guid == psystem_guid);
+}
+
+static int
+actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct pedit_headers_action *hdrs = parse_attr->hdrs;
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
+ !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
+ return 0;
+
+ ns_type = mlx5e_get_flow_namespace(flow);
+
+ err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
+ if (err)
+ return err;
+
+ if (parse_attr->mod_hdr_acts.num_actions > 0)
+ return 0;
+
+ /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
+
+ if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
+ return 0;
+
+ if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
+ (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
+ attr->esw_attr->split_count = 0;
+
+ return 0;
+}
+
+static struct mlx5_flow_attr*
+mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ u32 attr_sz = ns_to_attr_sz(ns_type);
+ struct mlx5_flow_attr *attr2;
+
+ attr2 = mlx5_alloc_flow_attr(ns_type);
+ parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
+ if (!attr2 || !parse_attr) {
+ kvfree(parse_attr);
+ kfree(attr2);
+ return NULL;
+ }
+
+ memcpy(attr2, attr, attr_sz);
+ INIT_LIST_HEAD(&attr2->list);
+ parse_attr->filter_dev = attr->parse_attr->filter_dev;
+ attr2->action = 0;
+ attr2->flags = 0;
+ attr2->parse_attr = parse_attr;
+ attr2->dest_chain = 0;
+ attr2->dest_ft = NULL;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+ attr2->esw_attr->out_count = 0;
+ attr2->esw_attr->split_count = 0;
+ }
+
+ return attr2;
+}
+
+static struct mlx5_core_dev *
+get_flow_counter_dev(struct mlx5e_tc_flow *flow)
+{
+ return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
+}
+
+struct mlx5_flow_attr *
+mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_attr *attr;
+ int i;
+
+ list_for_each_entry(attr, &flow->attrs, list) {
+ esw_attr = attr->esw_attr;
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
+ return attr;
+ }
+ }
+
+ return NULL;
+}
+
+void
+mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr;
+
+ list_for_each_entry(attr, &flow->attrs, list) {
+ if (list_is_last(&attr->list, &flow->attrs))
+ break;
+
+ mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
+ }
+}
+
+static void
+free_flow_post_acts(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr, *tmp;
+ bool vf_tun;
+
+ list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
+ if (list_is_last(&attr->list, &flow->attrs))
+ break;
+
+ if (attr->post_act_handle)
+ mlx5e_tc_post_act_del(post_act, attr->post_act_handle);
+
+ clean_encap_dests(flow->priv, flow, attr, &vf_tun);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(counter_dev, attr->counter);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
+ if (attr->modify_hdr)
+ mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr);
+ }
+
+ list_del(&attr->list);
+ kvfree(attr->parse_attr);
+ kfree(attr);
+ }
+}
+
+int
+mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr;
+ int err = 0;
+
+ list_for_each_entry(attr, &flow->attrs, list) {
+ if (list_is_last(&attr->list, &flow->attrs))
+ break;
+
+ err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/* TC filter rule HW translation:
+ *
+ * +---------------------+
+ * + ft prio (tc chain) +
+ * + original match +
+ * +---------------------+
+ * |
+ * | if multi table action
+ * |
+ * v
+ * +---------------------+
+ * + post act ft |<----.
+ * + match fte id | | split on multi table action
+ * + do actions |-----'
+ * +---------------------+
+ * |
+ * |
+ * v
+ * Do rest of the actions after last multi table action.
+ */
+static int
+alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+ struct mlx5_flow_attr *attr, *next_attr = NULL;
+ struct mlx5e_post_act_handle *handle;
+ bool vf_tun;
+ int err;
+
+ /* This is going in reverse order as needed.
+ * The first entry is the last attribute.
+ */
+ list_for_each_entry(attr, &flow->attrs, list) {
+ if (!next_attr) {
+ /* Set counter action on last post act rule. */
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ } else {
+ err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
+ if (err)
+ goto out_free;
+ }
+
+ /* Don't add post_act rule for first attr (last in the list).
+ * It's being handled by the caller.
+ */
+ if (list_is_last(&attr->list, &flow->attrs))
+ break;
+
+ err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun);
+ if (err)
+ goto out_free;
+
+ err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
+ if (err)
+ goto out_free;
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr);
+ if (err)
+ goto out_free;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
+ if (err)
+ goto out_free;
+ }
+
+ handle = mlx5e_tc_post_act_add(post_act, attr);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_free;
+ }
+
+ attr->post_act_handle = handle;
+ next_attr = attr;
+ }
+
+ if (flow_flag_test(flow, SLOW))
+ goto out;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err)
+ goto out_free;
+
+out:
+ return 0;
+
+out_free:
+ free_flow_post_acts(flow);
+ return err;
+}
+
+static int
+parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow_action flow_action_reorder;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct mlx5_flow_attr *attr = flow->attr;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_priv *priv = flow->priv;
+ struct flow_action_entry *act, **_act;
+ struct mlx5e_tc_act *tc_act;
+ int err, i;
+
+ flow_action_reorder.num_entries = flow_action->num_entries;
+ flow_action_reorder.entries = kcalloc(flow_action->num_entries,
+ sizeof(flow_action), GFP_KERNEL);
+ if (!flow_action_reorder.entries)
+ return -ENOMEM;
+
+ mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
+
+ ns_type = mlx5e_get_flow_namespace(flow);
+ list_add(&attr->list, &flow->attrs);
+
+ flow_action_for_each(i, _act, &flow_action_reorder) {
+ act = *_act;
+ tc_act = mlx5e_tc_act_get(act->id, ns_type);
+ if (!tc_act) {
+ NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
+ err = -EOPNOTSUPP;
+ goto out_free;
+ }
+
+ if (!tc_act->can_offload(parse_state, act, i, attr)) {
+ err = -EOPNOTSUPP;
+ goto out_free;
+ }
+
+ err = tc_act->parse_action(parse_state, act, priv, attr);
+ if (err)
+ goto out_free;
+
+ parse_state->actions |= attr->action;
+
+ /* Split attr for multi table act if not the last act. */
+ if (tc_act->is_multi_table_act &&
+ tc_act->is_multi_table_act(priv, act, attr) &&
+ i < flow_action_reorder.num_entries - 1) {
+ err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
+ if (err)
+ goto out_free;
+
+ attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
+ if (!attr) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ list_add(&attr->list, &flow->attrs);
+ }
+ }
+
+ kfree(flow_action_reorder.entries);
+
+ err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
+ if (err)
+ goto out_free_post_acts;
+
+ err = alloc_flow_post_acts(flow, extack);
+ if (err)
+ goto out_free_post_acts;
+
+ return 0;
+
+out_free:
+ kfree(flow_action_reorder.entries);
+out_free_post_acts:
+ free_flow_post_acts(flow);
+
+ return err;
+}
+
+static int
+flow_action_supported(struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ if (!flow_action_has_entries(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
+ return -EINVAL;
+ }
+
+ if (!flow_action_hw_stats_check(flow_action, extack,
+ FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+parse_tc_nic_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_act_parse_state *parse_state;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ int err;
+
+ err = flow_action_supported(flow_action, extack);
+ if (err)
+ return err;
+
+ attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ parse_attr = attr->parse_attr;
+ parse_state = &parse_attr->parse_state;
+ mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
+ parse_state->ct_priv = get_ct_priv(priv);
+
+ err = parse_tc_actions(parse_state, flow_action);
+ if (err)
+ return err;
+
+ err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
+ if (err)
+ return err;
+
+ if (!actions_match_supported(priv, flow_action, parse_state->actions,
+ parse_attr, flow, extack))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
+{
+ struct mlx5e_priv *peer_priv;
+
+ peer_priv = netdev_priv(peer_netdev);
+
+ return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
+ mlx5e_eswitch_vf_rep(priv->netdev) &&
+ mlx5e_eswitch_vf_rep(peer_netdev) &&
+ mlx5e_same_hw_devs(priv, peer_priv));
+}
+
+static bool same_hw_reps(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
+{
+ struct mlx5e_priv *peer_priv;
+
+ peer_priv = netdev_priv(peer_netdev);
+
+ return mlx5e_eswitch_rep(priv->netdev) &&
+ mlx5e_eswitch_rep(peer_netdev) &&
+ mlx5e_same_hw_devs(priv, peer_priv);
+}
+
+static bool is_lag_dev(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
+{
+ return ((mlx5_lag_is_sriov(priv->mdev) ||
+ mlx5_lag_is_multipath(priv->mdev)) &&
+ same_hw_reps(priv, peer_netdev));
+}
+
+static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
+{
+ if (same_hw_reps(priv, out_dev) &&
+ MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
+ MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
+ return true;
+
+ return false;
+}
+
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+ struct net_device *out_dev)
+{
+ if (is_merged_eswitch_vfs(priv, out_dev))
+ return true;
+
+ if (is_multiport_eligible(priv, out_dev))
+ return true;
+
+ if (is_lag_dev(priv, out_dev))
+ return true;
+
+ return mlx5e_eswitch_rep(out_dev) &&
+ same_port_devs(priv, netdev_priv(out_dev));
+}
+
+int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type,
+ u32 *action,
+ int out_index)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5e_tc_int_port_priv *int_port_priv;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_tc_int_port *dest_int_port;
+ int err;
+
+ parse_attr = attr->parse_attr;
+ int_port_priv = mlx5e_get_int_port_priv(priv);
+
+ dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
+ if (IS_ERR(dest_int_port))
+ return PTR_ERR(dest_int_port);
+
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
+ mlx5e_tc_int_port_get_metadata(dest_int_port));
+ if (err) {
+ mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
+ return err;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ esw_attr->dest_int_port = dest_int_port;
+ esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
+ esw_attr->split_count = out_index;
+
+ /* Forward to root fdb for matching against the new source vport */
+ attr->dest_chain = 0;
+
+ return 0;
+}
+
+static int
+parse_tc_fdb_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_act_parse_state *parse_state;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct net_device *filter_dev;
+ int err;
+
+ err = flow_action_supported(flow_action, extack);
+ if (err)
+ return err;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ filter_dev = parse_attr->filter_dev;
+ parse_state = &parse_attr->parse_state;
+ mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
+ parse_state->ct_priv = get_ct_priv(priv);
+
+ err = parse_tc_actions(parse_state, flow_action);
+ if (err)
+ return err;
+
+ /* Forward to/from internal port can only have 1 dest */
+ if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) &&
+ esw_attr->out_count > 1) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Rules with internal port can have only one destination");
+ return -EOPNOTSUPP;
+ }
+
+ /* Forward from tunnel/internal port to internal port is not supported */
+ if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) &&
+ esw_attr->dest_int_port) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Forwarding from tunnel/internal port to internal port is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
+ if (err)
+ return err;
+
+ if (!actions_match_supported(priv, flow_action, parse_state->actions,
+ parse_attr, flow, extack))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static void get_flags(int flags, unsigned long *flow_flags)
+{
+ unsigned long __flow_flags = 0;
+
+ if (flags & MLX5_TC_FLAG(INGRESS))
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
+ if (flags & MLX5_TC_FLAG(EGRESS))
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
+
+ if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
+ if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
+ if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
+
+ *flow_flags = __flow_flags;
+}
+
+static const struct rhashtable_params tc_ht_params = {
+ .head_offset = offsetof(struct mlx5e_tc_flow, node),
+ .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
+ .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
+ .automatic_shrinking = true,
+};
+
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
+ unsigned long flags)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5e_rep_priv *rpriv;
+
+ if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
+ rpriv = priv->ppriv;
+ return &rpriv->tc_ht;
+ } else /* NIC offload */
+ return &tc->ht;
+}
+
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
+ flow_flag_test(flow, INGRESS);
+ bool act_is_encap = !!(attr->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
+ bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS);
+
+ if (!esw_paired)
+ return false;
+
+ if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
+ mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
+ (is_rep_ingress || act_is_encap))
+ return true;
+
+ return false;
+}
+
+struct mlx5_flow_attr *
+mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
+{
+ u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ?
+ sizeof(struct mlx5_esw_flow_attr) :
+ sizeof(struct mlx5_nic_flow_attr);
+ struct mlx5_flow_attr *attr;
+
+ attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
+ if (!attr)
+ return attr;
+
+ INIT_LIST_HEAD(&attr->list);
+ return attr;
+}
+
+static int
+mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
+ struct flow_cls_offload *f, unsigned long flow_flags,
+ struct mlx5e_tc_flow_parse_attr **__parse_attr,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr;
+ struct mlx5e_tc_flow *flow;
+ int err = -ENOMEM;
+ int out_index;
+
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+ parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
+ if (!parse_attr || !flow)
+ goto err_free;
+
+ flow->flags = flow_flags;
+ flow->cookie = f->cookie;
+ flow->priv = priv;
+
+ attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
+ if (!attr)
+ goto err_free;
+
+ flow->attr = attr;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+ INIT_LIST_HEAD(&flow->encaps[out_index].list);
+ INIT_LIST_HEAD(&flow->hairpin);
+ INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
+ INIT_LIST_HEAD(&flow->attrs);
+ refcount_set(&flow->refcnt, 1);
+ init_completion(&flow->init_done);
+ init_completion(&flow->del_hw_done);
+
+ *__flow = flow;
+ *__parse_attr = parse_attr;
+
+ return 0;
+
+err_free:
+ kfree(flow);
+ kvfree(parse_attr);
+ return err;
+}
+
+static void
+mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct flow_cls_offload *f)
+{
+ attr->parse_attr = parse_attr;
+ attr->chain = f->common.chain_index;
+ attr->prio = f->common.prio;
+}
+
+static void
+mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
+ struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct flow_cls_offload *f,
+ struct mlx5_eswitch_rep *in_rep,
+ struct mlx5_core_dev *in_mdev)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ mlx5e_flow_attr_init(attr, parse_attr, f);
+
+ esw_attr->in_rep = in_rep;
+ esw_attr->in_mdev = in_mdev;
+
+ if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
+ MLX5_COUNTER_SOURCE_ESWITCH)
+ esw_attr->counter_dev = in_mdev;
+ else
+ esw_attr->counter_dev = priv->mdev;
+}
+
+static struct mlx5e_tc_flow *
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f,
+ unsigned long flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5_eswitch_rep *in_rep,
+ struct mlx5_core_dev *in_mdev)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_tc_flow *flow;
+ int attr_size, err;
+
+ flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
+ attr_size = sizeof(struct mlx5_esw_flow_attr);
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+ &parse_attr, &flow);
+ if (err)
+ goto out;
+
+ parse_attr->filter_dev = filter_dev;
+ mlx5e_flow_esw_attr_init(flow->attr,
+ priv, parse_attr,
+ f, in_rep, in_mdev);
+
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+ f, filter_dev);
+ if (err)
+ goto err_free;
+
+ /* actions validation depends on parsing the ct matches first */
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
+ &flow->attr->ct_attr, extack);
+ if (err)
+ goto err_free;
+
+ err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
+ if (err)
+ goto err_free;
+
+ if (flow->attr->lag.count) {
+ err = mlx5_lag_add_mpesw_rule(esw->dev);
+ if (err)
+ goto err_free;
+ }
+
+ err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
+ complete_all(&flow->init_done);
+ if (err) {
+ if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
+ goto err_lag;
+
+ add_unready_flow(flow);
+ }
+
+ return flow;
+
+err_lag:
+ if (flow->attr->lag.count)
+ mlx5_lag_del_mpesw_rule(esw->dev);
+err_free:
+ mlx5e_flow_put(priv, flow);
+out:
+ return ERR_PTR(err);
+}
+
+static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
+ struct mlx5e_tc_flow *flow,
+ unsigned long flow_flags)
+{
+ struct mlx5e_priv *priv = flow->priv, *peer_priv;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_rep_priv *peer_urpriv;
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5_core_dev *in_mdev;
+ int err = 0;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return -ENODEV;
+
+ peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
+ peer_priv = netdev_priv(peer_urpriv->netdev);
+
+ /* in_mdev is assigned of which the packet originated from.
+ * So packets redirected to uplink use the same mdev of the
+ * original flow and packets redirected from uplink use the
+ * peer mdev.
+ */
+ if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
+ in_mdev = peer_priv->mdev;
+ else
+ in_mdev = priv->mdev;
+
+ parse_attr = flow->attr->parse_attr;
+ peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
+ parse_attr->filter_dev,
+ attr->in_rep, in_mdev);
+ if (IS_ERR(peer_flow)) {
+ err = PTR_ERR(peer_flow);
+ goto out;
+ }
+
+ flow->peer_flow = peer_flow;
+ flow_flag_set(flow, DUP);
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+out:
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f,
+ unsigned long flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *in_rep = rpriv->rep;
+ struct mlx5_core_dev *in_mdev = priv->mdev;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
+ in_mdev);
+ if (IS_ERR(flow))
+ return PTR_ERR(flow);
+
+ if (is_peer_flow_needed(flow)) {
+ err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
+ if (err) {
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ goto out;
+ }
+ }
+
+ *__flow = flow;
+
+ return 0;
+
+out:
+ return err;
+}
+
+static int
+mlx5e_add_nic_flow(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f,
+ unsigned long flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_tc_flow *flow;
+ int attr_size, err;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+ return -EOPNOTSUPP;
+ } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
+ return -EOPNOTSUPP;
+ }
+
+ flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
+ attr_size = sizeof(struct mlx5_nic_flow_attr);
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+ &parse_attr, &flow);
+ if (err)
+ goto out;
+
+ parse_attr->filter_dev = filter_dev;
+ mlx5e_flow_attr_init(flow->attr, parse_attr, f);
+
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+ f, filter_dev);
+ if (err)
+ goto err_free;
+
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
+ &flow->attr->ct_attr, extack);
+ if (err)
+ goto err_free;
+
+ err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
+ if (err)
+ goto err_free;
+
+ err = mlx5e_tc_add_nic_flow(priv, flow, extack);
+ if (err)
+ goto err_free;
+
+ flow_flag_set(flow, OFFLOADED);
+ *__flow = flow;
+
+ return 0;
+
+err_free:
+ flow_flag_set(flow, FAILED);
+ mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
+ mlx5e_flow_put(priv, flow);
+out:
+ return err;
+}
+
+static int
+mlx5e_tc_add_flow(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f,
+ unsigned long flags,
+ struct net_device *filter_dev,
+ struct mlx5e_tc_flow **flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long flow_flags;
+ int err;
+
+ get_flags(flags, &flow_flags);
+
+ if (!tc_can_offload_extack(priv->netdev, f->common.extack))
+ return -EOPNOTSUPP;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ err = mlx5e_add_fdb_flow(priv, f, flow_flags,
+ filter_dev, flow);
+ else
+ err = mlx5e_add_nic_flow(priv, f, flow_flags,
+ filter_dev, flow);
+
+ return err;
+}
+
+static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
+ struct mlx5e_rep_priv *rpriv)
+{
+ /* Offloaded flow rule is allowed to duplicate on non-uplink representor
+ * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
+ * function is called from NIC mode.
+ */
+ return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
+}
+
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
+ struct flow_cls_offload *f, unsigned long flags)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_tc_flow *flow;
+ int err = 0;
+
+ if (!mlx5_esw_hold(priv->mdev))
+ return -EBUSY;
+
+ mlx5_esw_get(priv->mdev);
+
+ rcu_read_lock();
+ flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
+ if (flow) {
+ /* Same flow rule offloaded to non-uplink representor sharing tc block,
+ * just return 0.
+ */
+ if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
+ goto rcu_unlock;
+
+ NL_SET_ERR_MSG_MOD(extack,
+ "flow cookie already exists, ignoring");
+ netdev_warn_once(priv->netdev,
+ "flow cookie %lx already exists, ignoring\n",
+ f->cookie);
+ err = -EEXIST;
+ goto rcu_unlock;
+ }
+rcu_unlock:
+ rcu_read_unlock();
+ if (flow)
+ goto out;
+
+ trace_mlx5e_configure_flower(f);
+ err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
+ if (err)
+ goto out;
+
+ /* Flow rule offloaded to non-uplink representor sharing tc block,
+ * set the flow's owner dev.
+ */
+ if (is_flow_rule_duplicate_allowed(dev, rpriv))
+ flow->orig_dev = dev;
+
+ err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
+ if (err)
+ goto err_free;
+
+ mlx5_esw_release(priv->mdev);
+ return 0;
+
+err_free:
+ mlx5e_flow_put(priv, flow);
+out:
+ mlx5_esw_put(priv->mdev);
+ mlx5_esw_release(priv->mdev);
+ return err;
+}
+
+static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
+{
+ bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
+ bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
+
+ return flow_flag_test(flow, INGRESS) == dir_ingress &&
+ flow_flag_test(flow, EGRESS) == dir_egress;
+}
+
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
+ struct flow_cls_offload *f, unsigned long flags)
+{
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ rcu_read_lock();
+ flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
+ if (!flow || !same_flow_direction(flow, flags)) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
+ * set.
+ */
+ if (flow_flag_test_and_set(flow, DELETED)) {
+ err = -EINVAL;
+ goto errout;
+ }
+ rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
+ rcu_read_unlock();
+
+ trace_mlx5e_delete_flower(f);
+ mlx5e_flow_put(priv, flow);
+
+ mlx5_esw_put(priv->mdev);
+ return 0;
+
+errout:
+ rcu_read_unlock();
+ return err;
+}
+
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
+ struct flow_cls_offload *f, unsigned long flags)
+{
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+ struct mlx5_eswitch *peer_esw;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_fc *counter;
+ u64 lastuse = 0;
+ u64 packets = 0;
+ u64 bytes = 0;
+ int err = 0;
+
+ rcu_read_lock();
+ flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
+ tc_ht_params));
+ rcu_read_unlock();
+ if (IS_ERR(flow))
+ return PTR_ERR(flow);
+
+ if (!same_flow_direction(flow, flags)) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
+ counter = mlx5e_tc_get_counter(flow);
+ if (!counter)
+ goto errout;
+
+ mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+ }
+
+ /* Under multipath it's possible for one rule to be currently
+ * un-offloaded while the other rule is offloaded.
+ */
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ goto out;
+
+ if (flow_flag_test(flow, DUP) &&
+ flow_flag_test(flow->peer_flow, OFFLOADED)) {
+ u64 bytes2;
+ u64 packets2;
+ u64 lastuse2;
+
+ counter = mlx5e_tc_get_counter(flow->peer_flow);
+ if (!counter)
+ goto no_peer_counter;
+ mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+
+ bytes += bytes2;
+ packets += packets2;
+ lastuse = max_t(u64, lastuse, lastuse2);
+ }
+
+no_peer_counter:
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+out:
+ flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ trace_mlx5e_stats_flower(f);
+errout:
+ mlx5e_flow_put(priv, flow);
+ return err;
+}
+
+static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch *esw;
+ u32 rate_mbps = 0;
+ u16 vport_num;
+ int err;
+
+ vport_num = rpriv->rep->vport;
+ if (vport_num >= MLX5_VPORT_ECPF) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Ingress rate limit is supported only for Eswitch ports connected to VFs");
+ return -EOPNOTSUPP;
+ }
+
+ esw = priv->mdev->priv.eswitch;
+ /* rate is given in bytes/sec.
+ * First convert to bits/sec and then round to the nearest mbit/secs.
+ * mbit means million bits.
+ * Moreover, if rate is non zero we choose to configure to a minimum of
+ * 1 mbit/sec.
+ */
+ if (rate) {
+ rate = (rate * BITS_PER_BYTE) + 500000;
+ do_div(rate, 1000000);
+ rate_mbps = max_t(u32, rate, 1);
+ }
+
+ err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
+
+ return err;
+}
+
+int mlx5e_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when exceed action is not drop");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
+ !flow_action_is_last_entry(action, act)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is ok, but action is not last");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.peakrate_bytes_ps ||
+ act->police.avrate || act->police.overhead) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when peakrate/avrate/overhead is configured");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ const struct flow_action_entry *act;
+ int err;
+ int i;
+
+ if (!flow_action_has_entries(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
+ return -EINVAL;
+ }
+
+ if (!flow_offload_has_one_action(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
+ return -EOPNOTSUPP;
+ }
+
+ if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_POLICE:
+ if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is not continue");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlx5e_policer_validate(flow_action, act, extack);
+ if (err)
+ return err;
+
+ err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
+ if (err)
+ return err;
+
+ rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct netlink_ext_ack *extack = ma->common.extack;
+
+ if (ma->common.prio != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
+ return -EINVAL;
+ }
+
+ return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
+}
+
+int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct netlink_ext_ack *extack = ma->common.extack;
+
+ return apply_police_params(priv, 0, extack);
+}
+
+void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct rtnl_link_stats64 cur_stats;
+ u64 dbytes;
+ u64 dpkts;
+
+ cur_stats = priv->stats.vf_vport;
+ dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
+ dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
+ rpriv->prev_vf_vport_stats = cur_stats;
+ flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
+ FLOW_ACTION_HW_STATS_DELAYED);
+}
+
+static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
+ struct mlx5e_priv *peer_priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
+ struct mlx5e_hairpin_entry *hpe, *tmp;
+ LIST_HEAD(init_wait_list);
+ u16 peer_vhca_id;
+ int bkt;
+
+ if (!mlx5e_same_hw_devs(priv, peer_priv))
+ return;
+
+ peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
+
+ mutex_lock(&tc->hairpin_tbl_lock);
+ hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
+ if (refcount_inc_not_zero(&hpe->refcnt))
+ list_add(&hpe->dead_peer_wait_list, &init_wait_list);
+ mutex_unlock(&tc->hairpin_tbl_lock);
+
+ list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
+ wait_for_completion(&hpe->res_ready);
+ if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
+ mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
+
+ mlx5e_hairpin_put(priv, hpe);
+ }
+}
+
+static int mlx5e_tc_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5e_priv *peer_priv;
+ struct mlx5e_tc_table *tc;
+ struct mlx5e_priv *priv;
+
+ if (ndev->netdev_ops != &mlx5e_netdev_ops ||
+ event != NETDEV_UNREGISTER ||
+ ndev->reg_state == NETREG_REGISTERED)
+ return NOTIFY_DONE;
+
+ tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
+ priv = tc->priv;
+ peer_priv = netdev_priv(ndev);
+ if (priv == peer_priv ||
+ !(priv->netdev->features & NETIF_F_HW_TC))
+ return NOTIFY_DONE;
+
+ mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
+
+ return NOTIFY_DONE;
+}
+
+static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
+{
+ int tc_grp_size, tc_tbl_size;
+ u32 max_flow_counter;
+
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
+
+ tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
+ BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
+
+ return tc_tbl_size;
+}
+
+static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_flow_table **ft = &tc->miss_t;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ int err = 0;
+
+ ft_attr.max_fte = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.level = MLX5E_TC_MISS_LEVEL;
+ ft_attr.prio = 0;
+ ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
+
+ *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(*ft)) {
+ err = PTR_ERR(*ft);
+ netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
+ }
+
+ return err;
+}
+
+static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+
+ mlx5_destroy_flow_table(tc->miss_t);
+}
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct mapping_ctx *chains_mapping;
+ struct mlx5_chains_attr attr = {};
+ u64 mapping_id;
+ int err;
+
+ mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
+ mutex_init(&tc->t_lock);
+ mutex_init(&tc->hairpin_tbl_lock);
+ hash_init(tc->hairpin_tbl);
+ tc->priv = priv;
+
+ err = rhashtable_init(&tc->ht, &tc_ht_params);
+ if (err)
+ return err;
+
+ lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
+ lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
+
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+
+ if (IS_ERR(chains_mapping)) {
+ err = PTR_ERR(chains_mapping);
+ goto err_mapping;
+ }
+ tc->mapping = chains_mapping;
+
+ err = mlx5e_tc_nic_create_miss_table(priv);
+ if (err)
+ goto err_chains;
+
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
+ attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+ attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
+ attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
+ attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
+ attr.default_ft = tc->miss_t;
+ attr.mapping = chains_mapping;
+
+ tc->chains = mlx5_chains_create(dev, &attr);
+ if (IS_ERR(tc->chains)) {
+ err = PTR_ERR(tc->chains);
+ goto err_miss;
+ }
+
+ tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
+ tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
+ MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
+
+ tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
+ err = register_netdevice_notifier_dev_net(priv->netdev,
+ &tc->netdevice_nb,
+ &tc->netdevice_nn);
+ if (err) {
+ tc->netdevice_nb.notifier_call = NULL;
+ mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
+ goto err_reg;
+ }
+
+ return 0;
+
+err_reg:
+ mlx5_tc_ct_clean(tc->ct);
+ mlx5e_tc_post_act_destroy(tc->post_act);
+ mlx5_chains_destroy(tc->chains);
+err_miss:
+ mlx5e_tc_nic_destroy_miss_table(priv);
+err_chains:
+ mapping_destroy(chains_mapping);
+err_mapping:
+ rhashtable_destroy(&tc->ht);
+ return err;
+}
+
+static void _mlx5e_tc_del_flow(void *ptr, void *arg)
+{
+ struct mlx5e_tc_flow *flow = ptr;
+ struct mlx5e_priv *priv = flow->priv;
+
+ mlx5e_tc_del_flow(priv, flow);
+ kfree(flow);
+}
+
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+
+ if (tc->netdevice_nb.notifier_call)
+ unregister_netdevice_notifier_dev_net(priv->netdev,
+ &tc->netdevice_nb,
+ &tc->netdevice_nn);
+
+ mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
+ mutex_destroy(&tc->hairpin_tbl_lock);
+
+ rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
+
+ if (!IS_ERR_OR_NULL(tc->t)) {
+ mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
+ tc->t = NULL;
+ }
+ mutex_destroy(&tc->t_lock);
+
+ mlx5_tc_ct_clean(tc->ct);
+ mlx5e_tc_post_act_destroy(tc->post_act);
+ mapping_destroy(tc->mapping);
+ mlx5_chains_destroy(tc->chains);
+ mlx5e_tc_nic_destroy_miss_table(priv);
+}
+
+int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
+{
+ int err;
+
+ err = rhashtable_init(tc_ht, &tc_ht_params);
+ if (err)
+ return err;
+
+ lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
+ lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
+
+ return 0;
+}
+
+void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
+{
+ rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
+}
+
+int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
+{
+ const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
+ struct mlx5e_rep_priv *rpriv;
+ struct mapping_ctx *mapping;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+ u64 mapping_id;
+ int err = 0;
+
+ rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+ priv = netdev_priv(rpriv->netdev);
+ esw = priv->mdev->priv.eswitch;
+
+ uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
+ MLX5_FLOW_NAMESPACE_FDB);
+ uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
+ esw_chains(esw),
+ &esw->offloads.mod_hdr,
+ MLX5_FLOW_NAMESPACE_FDB,
+ uplink_priv->post_act);
+
+ uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
+
+ uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
+
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
+ sizeof(struct tunnel_match_key),
+ TUNNEL_INFO_BITS_MASK, true);
+
+ if (IS_ERR(mapping)) {
+ err = PTR_ERR(mapping);
+ goto err_tun_mapping;
+ }
+ uplink_priv->tunnel_mapping = mapping;
+
+ /* Two last values are reserved for stack devices slow path table mark
+ * and bridge ingress push mark.
+ */
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true);
+ if (IS_ERR(mapping)) {
+ err = PTR_ERR(mapping);
+ goto err_enc_opts_mapping;
+ }
+ uplink_priv->tunnel_enc_opts_mapping = mapping;
+
+ uplink_priv->encap = mlx5e_tc_tun_init(priv);
+ if (IS_ERR(uplink_priv->encap)) {
+ err = PTR_ERR(uplink_priv->encap);
+ goto err_register_fib_notifier;
+ }
+
+ mlx5_esw_offloads_devcom_init(esw);
+
+ return 0;
+
+err_register_fib_notifier:
+ mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
+err_enc_opts_mapping:
+ mapping_destroy(uplink_priv->tunnel_mapping);
+err_tun_mapping:
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+ mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
+ netdev_warn(priv->netdev,
+ "Failed to initialize tc (eswitch), err: %d", err);
+ mlx5e_tc_post_act_destroy(uplink_priv->post_act);
+ return err;
+}
+
+void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+
+ rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+ priv = netdev_priv(rpriv->netdev);
+ esw = priv->mdev->priv.eswitch;
+
+ mlx5_esw_offloads_devcom_cleanup(esw);
+
+ mlx5e_tc_tun_cleanup(uplink_priv->encap);
+
+ mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
+ mapping_destroy(uplink_priv->tunnel_mapping);
+
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+ mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
+ mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
+ mlx5e_tc_post_act_destroy(uplink_priv->post_act);
+}
+
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
+{
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+
+ return atomic_read(&tc_ht->nelems);
+}
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
+{
+ struct mlx5e_tc_flow *flow, *tmp;
+
+ list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+}
+
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
+{
+ struct mlx5_rep_uplink_priv *rpriv =
+ container_of(work, struct mlx5_rep_uplink_priv,
+ reoffload_flows_work);
+ struct mlx5e_tc_flow *flow, *tmp;
+
+ mutex_lock(&rpriv->unready_flows_lock);
+ list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
+ if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
+ unready_flow_del(flow);
+ }
+ mutex_unlock(&rpriv->unready_flows_lock);
+}
+
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct flow_cls_offload *cls_flower,
+ unsigned long flags)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_DESTROY:
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_STATS:
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(INGRESS);
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!priv->netdev || !netif_device_present(priv->netdev))
+ return -EOPNOTSUPP;
+
+ if (mlx5e_is_uplink_rep(priv))
+ flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
+ else
+ flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ u32 chain = 0, chain_tag, reg_b, zone_restore_id;
+ struct mlx5e_priv *priv = netdev_priv(skb->dev);
+ struct mlx5_mapped_obj mapped_obj;
+ struct tc_skb_ext *tc_skb_ext;
+ struct mlx5e_tc_table *tc;
+ int err;
+
+ reg_b = be32_to_cpu(cqe->ft_metadata);
+ tc = mlx5e_fs_get_tc(priv->fs);
+ chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+
+ err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find chain for chain tag: %d, err: %d\n",
+ chain_tag, err);
+ return false;
+ }
+
+ if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+ chain = mapped_obj.chain;
+ tc_skb_ext = tc_skb_ext_alloc(skb);
+ if (WARN_ON(!tc_skb_ext))
+ return false;
+
+ tc_skb_ext->chain = chain;
+
+ zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
+ ESW_ZONE_ID_MASK;
+
+ if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
+ zone_restore_id))
+ return false;
+ } else {
+ netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
+ return false;
+ }
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
new file mode 100644
index 000000000..edd5f0944
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_EN_TC_H__
+#define __MLX5_EN_TC_H__
+
+#include <net/pkt_cls.h>
+#include "en.h"
+#include "eswitch.h"
+#include "en/tc_ct.h"
+#include "en/tc_tun.h"
+#include "en/tc/int_port.h"
+#include "en/tc/meter.h"
+#include "en_rep.h"
+
+#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+#define NIC_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\
+ sizeof(struct mlx5_nic_flow_attr))
+#define ESW_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\
+ sizeof(struct mlx5_esw_flow_attr))
+#define ns_to_attr_sz(ns) (((ns) == MLX5_FLOW_NAMESPACE_FDB) ?\
+ ESW_FLOW_ATTR_SZ :\
+ NIC_FLOW_ATTR_SZ)
+
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc);
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
+
+struct mlx5e_tc_update_priv {
+ struct net_device *fwd_dev;
+};
+
+struct mlx5_nic_flow_attr {
+ u32 flow_tag;
+ u32 hairpin_tirn;
+ struct mlx5_flow_table *hairpin_ft;
+};
+
+struct mlx5_flow_attr {
+ u32 action;
+ struct mlx5_fc *counter;
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_ct_attr ct_attr;
+ struct mlx5e_sample_attr sample_attr;
+ struct mlx5e_meter_attr meter_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ u32 chain;
+ u16 prio;
+ u32 dest_chain;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_table *dest_ft;
+ u8 inner_match_level;
+ u8 outer_match_level;
+ u8 tun_ip_version;
+ int tunnel_id; /* mapped tunnel id */
+ u32 flags;
+ u32 exe_aso_type;
+ struct list_head list;
+ struct mlx5e_post_act_handle *post_act_handle;
+ struct {
+ /* Indicate whether the parsed flow should be counted for lag mode decision
+ * making
+ */
+ bool count;
+ } lag;
+ /* keep this union last */
+ union {
+ struct mlx5_esw_flow_attr esw_attr[0];
+ struct mlx5_nic_flow_attr nic_attr[0];
+ };
+};
+
+enum {
+ MLX5_ATTR_FLAG_VLAN_HANDLED = BIT(0),
+ MLX5_ATTR_FLAG_SLOW_PATH = BIT(1),
+ MLX5_ATTR_FLAG_NO_IN_PORT = BIT(2),
+ MLX5_ATTR_FLAG_SRC_REWRITE = BIT(3),
+ MLX5_ATTR_FLAG_SAMPLE = BIT(4),
+ MLX5_ATTR_FLAG_ACCEPT = BIT(5),
+ MLX5_ATTR_FLAG_CT = BIT(6),
+};
+
+/* Returns true if any of the flags that require skipping further TC/NF processing are set. */
+static inline bool
+mlx5e_tc_attr_flags_skip(u32 attr_flags)
+{
+ return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT);
+}
+
+struct mlx5_rx_tun_attr {
+ u16 decap_vport;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } src_ip; /* Valid if decap_vport is not zero */
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } dst_ip; /* Valid if decap_vport is not zero */
+};
+
+#define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16
+#define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0)
+
+#define MLX5E_TC_MAX_INT_PORT_NUM (8)
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+struct tunnel_match_key {
+ struct flow_dissector_key_control enc_control;
+ struct flow_dissector_key_keyid enc_key_id;
+ struct flow_dissector_key_ports enc_tp;
+ struct flow_dissector_key_ip enc_ip;
+ union {
+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
+ };
+
+ int filter_ifindex;
+};
+
+struct tunnel_match_enc_opts {
+ struct flow_dissector_key_enc_opts key;
+ struct flow_dissector_key_enc_opts mask;
+};
+
+/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
+ * Upper TUNNEL_INFO_BITS for general tunnel info.
+ * Lower ENC_OPTS_BITS bits for enc_opts.
+ */
+#define TUNNEL_INFO_BITS 12
+#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
+#define ENC_OPTS_BITS 11
+#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
+#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
+#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
+
+enum {
+ MLX5E_TC_FLAG_INGRESS_BIT,
+ MLX5E_TC_FLAG_EGRESS_BIT,
+ MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
+ MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
+ MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
+ MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
+};
+
+#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT)
+
+int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv);
+void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv);
+
+int mlx5e_tc_ht_init(struct rhashtable *tc_ht);
+void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht);
+
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
+ struct flow_cls_offload *f, unsigned long flags);
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
+ struct flow_cls_offload *f, unsigned long flags);
+
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
+ struct flow_cls_offload *f, unsigned long flags);
+
+int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *f);
+int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *f);
+void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma);
+
+struct mlx5e_encap_entry;
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list);
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list);
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e);
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e);
+
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list);
+void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
+
+struct mlx5e_neigh_hash_entry;
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e);
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
+
+enum mlx5e_tc_attr_to_reg {
+ CHAIN_TO_REG,
+ VPORT_TO_REG,
+ TUNNEL_TO_REG,
+ CTSTATE_TO_REG,
+ ZONE_TO_REG,
+ ZONE_RESTORE_TO_REG,
+ MARK_TO_REG,
+ LABELS_TO_REG,
+ FTEID_TO_REG,
+ NIC_CHAIN_TO_REG,
+ NIC_ZONE_RESTORE_TO_REG,
+ PACKET_COLOR_TO_REG,
+};
+
+struct mlx5e_tc_attr_to_reg_mapping {
+ int mfield; /* rewrite field */
+ int moffset; /* bit offset of mfield */
+ int mlen; /* bits to rewrite/match */
+
+ int soffset; /* byte offset of spec for match */
+};
+
+extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[];
+
+#define MLX5_REG_MAPPING_MOFFSET(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].moffset)
+#define MLX5_REG_MAPPING_MBITS(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].mlen)
+#define MLX5_REG_MAPPING_MASK(reg_id) (GENMASK(mlx5e_tc_attr_to_reg_mappings[reg_id].mlen - 1, 0))
+
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+ struct net_device *out_dev);
+
+int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data);
+
+void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5e_tc_attr_to_reg type,
+ int act_id, u32 data);
+
+void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data,
+ u32 mask);
+
+void mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 *data,
+ u32 *mask);
+
+int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data);
+
+int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
+
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
+ struct flow_match_basic *match, bool outer,
+ void *headers_c, void *headers_v);
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv);
+
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5_flow_handle *
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev);
+int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev,
+ u16 *vport);
+
+int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type,
+ u32 *action,
+ int out_index);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
+static inline int mlx5e_tc_ht_init(struct rhashtable *tc_ht) { return 0; }
+static inline void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) {}
+static inline int
+mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{ return -EOPNOTSUPP; }
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+struct mlx5_flow_attr *mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type);
+
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+#else /* CONFIG_MLX5_ESWITCH */
+static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
+static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv,
+ unsigned long flags)
+{
+ return 0;
+}
+
+static inline int
+mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{ return -EOPNOTSUPP; }
+#endif
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+struct mlx5e_tc_table *mlx5e_tc_table_alloc(void);
+void mlx5e_tc_table_free(struct mlx5e_tc_table *tc);
+static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ u32 chain, reg_b;
+
+ reg_b = be32_to_cpu(cqe->ft_metadata);
+
+ if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ESW_ZONE_ID_BITS))
+ return false;
+
+ chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+ if (chain)
+ return true;
+#endif
+
+ return false;
+}
+
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { return NULL; }
+static inline void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) {}
+static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
+{ return false; }
+static inline bool
+mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
+{ return true; }
+#endif
+
+#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
new file mode 100644
index 000000000..a6d7e2cfc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -0,0 +1,1058 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <net/geneve.h>
+#include <net/dsfield.h>
+#include "en.h"
+#include "en/txrx.h"
+#include "ipoib/ipoib.h"
+#include "en_accel/en_accel.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/macsec.h"
+#include "en/ptp.h"
+#include <net/ipv6.h>
+
+static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
+{
+ int i;
+
+ for (i = 0; i < num_dma; i++) {
+ struct mlx5e_sq_dma *last_pushed_dma =
+ mlx5e_dma_get(sq, --sq->dma_fifo_pc);
+
+ mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma);
+ }
+}
+
+static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
+{
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+
+ return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
+}
+
+static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb)
+{
+ if (skb_transport_header_was_set(skb))
+ return skb_transport_offset(skb);
+ else
+ return mlx5e_skb_l2_header_offset(skb);
+}
+
+static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
+ struct sk_buff *skb)
+{
+ u16 hlen;
+
+ switch (mode) {
+ case MLX5_INLINE_MODE_NONE:
+ return 0;
+ case MLX5_INLINE_MODE_TCP_UDP:
+ hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
+ if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
+ hlen += VLAN_HLEN;
+ break;
+ case MLX5_INLINE_MODE_IP:
+ hlen = mlx5e_skb_l3_header_offset(skb);
+ break;
+ case MLX5_INLINE_MODE_L2:
+ default:
+ hlen = mlx5e_skb_l2_header_offset(skb);
+ }
+ return min_t(u16, hlen, skb_headlen(skb));
+}
+
+#define MLX5_UNSAFE_MEMCPY_DISCLAIMER \
+ "This copy has been bounds-checked earlier in " \
+ "mlx5i_sq_calc_wqe_attr() and intentionally " \
+ "crosses a flex array boundary. Since it is " \
+ "performance sensitive, splitting the copy is " \
+ "undesirable."
+
+static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
+{
+ struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start;
+ int cpy1_sz = 2 * ETH_ALEN;
+ int cpy2_sz = ihs - cpy1_sz;
+
+ memcpy(&vhdr->addrs, skb->data, cpy1_sz);
+ vhdr->h_vlan_proto = skb->vlan_proto;
+ vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb));
+ unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto,
+ skb->data + cpy1_sz,
+ cpy2_sz,
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+}
+
+static inline void
+mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg)))
+ return;
+
+ if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+ if (skb->encapsulation) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
+ MLX5_ETH_WQE_L4_INNER_CSUM;
+ sq->stats->csum_partial_inner++;
+ } else {
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial++;
+ }
+#ifdef CONFIG_MLX5_EN_TLS
+ } else if (unlikely(accel && accel->tls.tls_tisn)) {
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial++;
+#endif
+ } else
+ sq->stats->csum_none++;
+}
+
+/* Returns the number of header bytes that we plan
+ * to inline later in the transmit descriptor
+ */
+static inline u16
+mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+ u16 ihs;
+
+ *hopbyhop = 0;
+ if (skb->encapsulation) {
+ ihs = skb_inner_tcp_all_headers(skb);
+ stats->tso_inner_packets++;
+ stats->tso_inner_bytes += skb->len - ihs;
+ } else {
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+ ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
+ } else {
+ ihs = skb_tcp_all_headers(skb);
+ if (ipv6_has_hopopt_jumbo(skb)) {
+ *hopbyhop = sizeof(struct hop_jumbo_hdr);
+ ihs -= sizeof(struct hop_jumbo_hdr);
+ }
+ }
+ stats->tso_packets++;
+ stats->tso_bytes += skb->len - ihs - *hopbyhop;
+ }
+
+ return ihs;
+}
+
+static inline int
+mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ unsigned char *skb_data, u16 headlen,
+ struct mlx5_wqe_data_seg *dseg)
+{
+ dma_addr_t dma_addr = 0;
+ u8 num_dma = 0;
+ int i;
+
+ if (headlen) {
+ dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+ goto dma_unmap_wqe_err;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(headlen);
+
+ mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
+ num_dma++;
+ dseg++;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ int fsz = skb_frag_size(frag);
+
+ dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+ goto dma_unmap_wqe_err;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(fsz);
+
+ mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
+ num_dma++;
+ dseg++;
+ }
+
+ return num_dma;
+
+dma_unmap_wqe_err:
+ mlx5e_dma_unmap_wqe_err(sq, num_dma);
+ return -ENOMEM;
+}
+
+struct mlx5e_tx_attr {
+ u32 num_bytes;
+ u16 headlen;
+ u16 ihs;
+ __be16 mss;
+ u16 insz;
+ u8 opcode;
+ u8 hopbyhop;
+};
+
+struct mlx5e_tx_wqe_attr {
+ u16 ds_cnt;
+ u16 ds_cnt_inl;
+ u16 ds_cnt_ids;
+ u8 num_wqebbs;
+};
+
+static u8
+mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel)
+{
+ u8 mode;
+
+#ifdef CONFIG_MLX5_EN_TLS
+ if (accel && accel->tls.tls_tisn)
+ return MLX5_INLINE_MODE_TCP_UDP;
+#endif
+
+ mode = sq->min_inline_mode;
+
+ if (skb_vlan_tag_present(skb) &&
+ test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
+ mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
+
+ return mode;
+}
+
+static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel,
+ struct mlx5e_tx_attr *attr)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+
+ if (skb_is_gso(skb)) {
+ int hopbyhop;
+ u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop);
+
+ *attr = (struct mlx5e_tx_attr) {
+ .opcode = MLX5_OPCODE_LSO,
+ .mss = cpu_to_be16(skb_shinfo(skb)->gso_size),
+ .ihs = ihs,
+ .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
+ .headlen = skb_headlen(skb) - ihs - hopbyhop,
+ .hopbyhop = hopbyhop,
+ };
+
+ stats->packets += skb_shinfo(skb)->gso_segs;
+ } else {
+ u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel);
+ u16 ihs = mlx5e_calc_min_inline(mode, skb);
+
+ *attr = (struct mlx5e_tx_attr) {
+ .opcode = MLX5_OPCODE_SEND,
+ .mss = cpu_to_be16(0),
+ .ihs = ihs,
+ .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN),
+ .headlen = skb_headlen(skb) - ihs,
+ };
+
+ stats->packets++;
+ }
+
+ attr->insz = mlx5e_accel_tx_ids_len(sq, accel);
+ stats->bytes += attr->num_bytes;
+}
+
+static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr,
+ struct mlx5e_tx_wqe_attr *wqe_attr)
+{
+ u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
+ u16 ds_cnt_inl = 0;
+ u16 ds_cnt_ids = 0;
+
+ /* Sync the calculation with MLX5E_MAX_TX_WQEBBS. */
+
+ if (attr->insz)
+ ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
+ MLX5_SEND_WQE_DS);
+
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids;
+ if (attr->ihs) {
+ u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+ if (skb_vlan_tag_present(skb))
+ inl += VLAN_HLEN;
+
+ ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+ if (WARN_ON_ONCE(ds_cnt_inl > MLX5E_MAX_TX_INLINE_DS))
+ netdev_warn(skb->dev, "ds_cnt_inl = %u > max %u\n", ds_cnt_inl,
+ (u16)MLX5E_MAX_TX_INLINE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ *wqe_attr = (struct mlx5e_tx_wqe_attr) {
+ .ds_cnt = ds_cnt,
+ .ds_cnt_inl = ds_cnt_inl,
+ .ds_cnt_ids = ds_cnt_ids,
+ .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+ };
+}
+
+static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb)
+{
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+}
+
+static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) {
+ netif_tx_stop_queue(sq->txq);
+ sq->stats->stopped++;
+ }
+}
+
+static void mlx5e_tx_flush(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_wqe_info *wi;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ /* Must not be called when a MPWQE session is active but empty. */
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wi = &sq->db.wqe_info[pi];
+
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+
+ wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
+}
+
+static inline void
+mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr,
+ const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma,
+ struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg,
+ bool xmit_more)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ bool send_doorbell;
+
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .skb = skb,
+ .num_bytes = attr->num_bytes,
+ .num_dma = num_dma,
+ .num_wqebbs = wqe_attr->num_wqebbs,
+ .num_fifo_pkts = 0,
+ };
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt);
+
+ mlx5e_tx_skb_update_hwts_flags(skb);
+
+ sq->pc += wi->num_wqebbs;
+
+ mlx5e_tx_check_stop(sq);
+
+ if (unlikely(sq->ptpsq)) {
+ mlx5e_skb_cb_hwtstamp_init(skb);
+ mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb);
+ if (!netif_tx_queue_stopped(sq->txq) &&
+ !mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo)) {
+ netif_tx_stop_queue(sq->txq);
+ sq->stats->stopped++;
+ }
+ skb_get(skb);
+ }
+
+ send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more);
+ if (send_doorbell)
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
+}
+
+static void
+mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr,
+ struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe_info *wi;
+ u16 ihs = attr->ihs;
+ struct ipv6hdr *h6;
+ struct mlx5e_sq_stats *stats = sq->stats;
+ int num_dma;
+
+ stats->xmit_more += xmit_more;
+
+ /* fill wqe */
+ wi = &sq->db.wqe_info[pi];
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ eseg->mss = attr->mss;
+
+ if (ihs) {
+ u8 *start = eseg->inline_hdr.start;
+
+ if (unlikely(attr->hopbyhop)) {
+ /* remove the HBH header.
+ * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+ */
+ if (skb_vlan_tag_present(skb)) {
+ mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6));
+ ihs += VLAN_HLEN;
+ h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr));
+ } else {
+ unsafe_memcpy(start, skb->data,
+ ETH_HLEN + sizeof(*h6),
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ h6 = (struct ipv6hdr *)(start + ETH_HLEN);
+ }
+ h6->nexthdr = IPPROTO_TCP;
+ /* Copy the TCP header after the IPv6 one */
+ memcpy(h6 + 1,
+ skb->data + ETH_HLEN + sizeof(*h6) +
+ sizeof(struct hop_jumbo_hdr),
+ tcp_hdrlen(skb));
+ /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
+ } else if (skb_vlan_tag_present(skb)) {
+ mlx5e_insert_vlan(start, skb, ihs);
+ ihs += VLAN_HLEN;
+ stats->added_vlan_packets++;
+ } else {
+ unsafe_memcpy(eseg->inline_hdr.start, skb->data,
+ attr->ihs,
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ }
+ eseg->inline_hdr.sz |= cpu_to_be16(ihs);
+ dseg += wqe_attr->ds_cnt_inl;
+ } else if (skb_vlan_tag_present(skb)) {
+ eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
+ if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
+ eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN);
+ eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb));
+ stats->added_vlan_packets++;
+ }
+
+ dseg += wqe_attr->ds_cnt_ids;
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop,
+ attr->headlen, dseg);
+ if (unlikely(num_dma < 0))
+ goto err_drop;
+
+ mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more);
+
+ return;
+
+err_drop:
+ stats->dropped++;
+ dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
+}
+
+static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
+{
+ return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
+ !attr->insz && !mlx5e_macsec_skb_is_offload(skb);
+}
+
+static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+
+ /* Assumes the session is already running and has at least one packet. */
+ return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+}
+
+static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ net_prefetchw(wqe->data);
+
+ *session = (struct mlx5e_tx_mpwqe) {
+ .wqe = wqe,
+ .bytes_count = 0,
+ .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+ .pkt_count = 0,
+ .inline_on = 0,
+ };
+
+ memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+
+ sq->stats->mpwqe_blks++;
+}
+
+static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq)
+{
+ return sq->mpwqe.wqe;
+}
+
+static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg;
+
+ dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
+
+ session->pkt_count++;
+ session->bytes_count += txd->len;
+
+ dseg->addr = cpu_to_be64(txd->dma_addr);
+ dseg->byte_count = cpu_to_be32(txd->len);
+ dseg->lkey = sq->mkey_be;
+ session->ds_count++;
+
+ sq->stats->mpwqe_pkts++;
+}
+
+static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ u8 ds_count = session->ds_count;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_tx_wqe_info *wi;
+ u16 pi;
+
+ cseg = &session->wqe->ctrl;
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wi = &sq->db.wqe_info[pi];
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .skb = NULL,
+ .num_bytes = session->bytes_count,
+ .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS),
+ .num_dma = session->pkt_count,
+ .num_fifo_pkts = session->pkt_count,
+ };
+
+ sq->pc += wi->num_wqebbs;
+
+ session->wqe = NULL;
+
+ mlx5e_tx_check_stop(sq);
+
+ return cseg;
+}
+
+static void
+mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, bool xmit_more)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_xmit_data txd;
+
+ txd.data = skb->data;
+ txd.len = skb->len;
+
+ txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
+ goto err_unmap;
+
+ if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
+ mlx5e_tx_mpwqe_session_start(sq, eseg);
+ } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
+ mlx5e_tx_mpwqe_session_complete(sq);
+ mlx5e_tx_mpwqe_session_start(sq, eseg);
+ }
+
+ sq->stats->xmit_more += xmit_more;
+
+ mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
+ mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb);
+ mlx5e_tx_mpwqe_add_dseg(sq, &txd);
+ mlx5e_tx_skb_update_hwts_flags(skb);
+
+ if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
+ /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
+ cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+ if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more))
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) {
+ /* Might stop the queue, but we were asked to ring the doorbell anyway. */
+ cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ }
+
+ return;
+
+err_unmap:
+ mlx5e_dma_unmap_wqe_err(sq, 1);
+ sq->stats->dropped++;
+ dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
+}
+
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
+{
+ /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */
+ if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq)))
+ mlx5e_tx_mpwqe_session_complete(sq);
+}
+
+static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ if (ptpsq->ts_cqe_ctr_mask && unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ eseg->flow_table_metadata = cpu_to_be32(ptpsq->skb_fifo_pc &
+ ptpsq->ts_cqe_ctr_mask);
+}
+
+static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb, struct mlx5e_accel_tx_state *accel,
+ struct mlx5_wqe_eth_seg *eseg, u16 ihs)
+{
+ mlx5e_accel_tx_eseg(priv, skb, eseg, ihs);
+ mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg);
+ if (unlikely(sq->ptpsq))
+ mlx5e_cqe_ts_id_eseg(sq->ptpsq, skb, eseg);
+}
+
+netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_accel_tx_state accel = {};
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
+ struct mlx5e_tx_wqe *wqe;
+ struct mlx5e_txqsq *sq;
+ u16 pi;
+
+ /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the
+ * queue being changed is disabled, and smp_wmb guarantees that the
+ * changes are visible before mlx5e_xmit tries to read from txq2sq. It
+ * guarantees that the value of txq2sq[qid] doesn't change while
+ * mlx5e_xmit is running on queue number qid. smb_wmb is paired with
+ * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE.
+ */
+ sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+ if (unlikely(!sq)) {
+ /* Two cases when sq can be NULL:
+ * 1. The HTB node is registered, and mlx5e_select_queue
+ * selected its queue ID, but the SQ itself is not yet created.
+ * 2. HTB SQ creation failed. Similar to the previous case, but
+ * the SQ won't be created.
+ */
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* May send SKBs and WQEs. */
+ if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
+ return NETDEV_TX_OK;
+
+ mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr);
+
+ if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) {
+ if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
+ struct mlx5_wqe_eth_seg eseg = {};
+
+ mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs);
+ mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
+ return NETDEV_TX_OK;
+ }
+
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+ }
+
+ mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+
+ /* May update the WQE, but may not post other WQEs. */
+ mlx5e_accel_tx_finish(sq, wqe, &accel,
+ (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
+ mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs);
+ mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
+
+ return NETDEV_TX_OK;
+}
+
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more)
+{
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+ mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth);
+ mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more);
+}
+
+static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ int i;
+
+ for (i = 0; i < wi->num_dma; i++) {
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ }
+}
+
+static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe, int napi_budget)
+{
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ struct skb_shared_hwtstamps hwts = {};
+ u64 ts = get_cqe_ts(cqe);
+
+ hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts);
+ if (sq->ptpsq)
+ mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP,
+ hwts.hwtstamp, sq->ptpsq->cq_stats);
+ else
+ skb_tstamp_tx(skb, &hwts);
+ }
+
+ napi_consume_skb(skb, napi_budget);
+}
+
+static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+ struct mlx5_cqe64 *cqe, int napi_budget)
+{
+ int i;
+
+ for (i = 0; i < wi->num_fifo_pkts; i++) {
+ struct sk_buff *skb = mlx5e_skb_fifo_pop(&sq->db.skb_fifo);
+
+ mlx5e_consume_skb(sq, skb, cqe, napi_budget);
+ }
+}
+
+void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq)
+{
+ if (netif_tx_queue_stopped(sq->txq) &&
+ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
+ mlx5e_ptpsq_fifo_has_room(sq) &&
+ !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
+ netif_tx_wake_queue(sq->txq);
+ sq->stats->wake++;
+ }
+}
+
+bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
+{
+ struct mlx5e_sq_stats *stats;
+ struct mlx5e_txqsq *sq;
+ struct mlx5_cqe64 *cqe;
+ u32 dma_fifo_cc;
+ u32 nbytes;
+ u16 npkts;
+ u16 sqcc;
+ int i;
+
+ sq = container_of(cq, struct mlx5e_txqsq, cq);
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return false;
+
+ stats = sq->stats;
+
+ npkts = 0;
+ nbytes = 0;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ /* avoid dirtying sq cache line every cqe */
+ dma_fifo_cc = sq->dma_fifo_cc;
+
+ i = 0;
+ do {
+ struct mlx5e_tx_wqe_info *wi;
+ u16 wqe_counter;
+ bool last_wqe;
+ u16 ci;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ do {
+ last_wqe = (sqcc == wqe_counter);
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+
+ sqcc += wi->num_wqebbs;
+
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
+
+ npkts++;
+ nbytes += wi->num_bytes;
+ continue;
+ }
+
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
+ &dma_fifo_cc)))
+ continue;
+
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
+
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
+ } while (!last_wqe);
+
+ if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
+ if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
+ &sq->state)) {
+ mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
+ (struct mlx5_err_cqe *)cqe);
+ mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
+ queue_work(cq->priv->wq, &sq->recover_work);
+ }
+ stats->cqe_err++;
+ }
+
+ } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+ stats->cqes += i;
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->dma_fifo_cc = dma_fifo_cc;
+ sq->cc = sqcc;
+
+ netdev_tx_completed_queue(sq->txq, npkts, nbytes);
+
+ mlx5e_txqsq_wake(sq);
+
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
+static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
+{
+ int i;
+
+ for (i = 0; i < wi->num_fifo_pkts; i++)
+ dev_kfree_skb_any(mlx5e_skb_fifo_pop(&sq->db.skb_fifo));
+}
+
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_wqe_info *wi;
+ u32 dma_fifo_cc, nbytes = 0;
+ u16 ci, sqcc, npkts = 0;
+
+ sqcc = sq->cc;
+ dma_fifo_cc = sq->dma_fifo_cc;
+
+ while (sqcc != sq->pc) {
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+
+ sqcc += wi->num_wqebbs;
+
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ dev_kfree_skb_any(wi->skb);
+
+ npkts++;
+ nbytes += wi->num_bytes;
+ continue;
+ }
+
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
+ continue;
+
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
+
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
+ }
+
+ sq->dma_fifo_cc = dma_fifo_cc;
+ sq->cc = sqcc;
+
+ netdev_tx_completed_queue(sq->txq, npkts, nbytes);
+}
+
+#ifdef CONFIG_MLX5_CORE_IPOIB
+static inline void
+mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
+ struct mlx5_wqe_datagram_seg *dseg)
+{
+ memcpy(&dseg->av, av, sizeof(struct mlx5_av));
+ dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV);
+ dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
+}
+
+static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr,
+ struct mlx5e_tx_wqe_attr *wqe_attr)
+{
+ u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS;
+ u16 ds_cnt_inl = 0;
+
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
+
+ if (attr->ihs) {
+ u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+ ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ *wqe_attr = (struct mlx5e_tx_wqe_attr) {
+ .ds_cnt = ds_cnt,
+ .ds_cnt_inl = ds_cnt_inl,
+ .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+ };
+}
+
+void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more)
+{
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
+ struct mlx5i_tx_wqe *wqe;
+
+ struct mlx5_wqe_datagram_seg *datagram;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe_info *wi;
+
+ struct mlx5e_sq_stats *stats = sq->stats;
+ int num_dma;
+ u16 pi;
+
+ mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+ mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
+
+ stats->xmit_more += xmit_more;
+
+ /* fill wqe */
+ wi = &sq->db.wqe_info[pi];
+ cseg = &wqe->ctrl;
+ datagram = &wqe->datagram;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram);
+
+ mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg);
+
+ eseg->mss = attr.mss;
+
+ if (attr.ihs) {
+ if (unlikely(attr.hopbyhop)) {
+ struct ipv6hdr *h6;
+
+ /* remove the HBH header.
+ * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+ */
+ unsafe_memcpy(eseg->inline_hdr.start, skb->data,
+ ETH_HLEN + sizeof(*h6),
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN);
+ h6->nexthdr = IPPROTO_TCP;
+ /* Copy the TCP header after the IPv6 one */
+ unsafe_memcpy(h6 + 1,
+ skb->data + ETH_HLEN + sizeof(*h6) +
+ sizeof(struct hop_jumbo_hdr),
+ tcp_hdrlen(skb),
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
+ } else {
+ unsafe_memcpy(eseg->inline_hdr.start, skb->data,
+ attr.ihs,
+ MLX5_UNSAFE_MEMCPY_DISCLAIMER);
+ }
+ eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
+ dseg += wqe_attr.ds_cnt_inl;
+ }
+
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop,
+ attr.headlen, dseg);
+ if (unlikely(num_dma < 0))
+ goto err_drop;
+
+ mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more);
+
+ return;
+
+err_drop:
+ stats->dropped++;
+ dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
new file mode 100644
index 000000000..44547b22a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/irq.h>
+#include <net/xdp_sock_drv.h>
+#include "en.h"
+#include "en/txrx.h"
+#include "en/xdp.h"
+#include "en/xsk/rx.h"
+#include "en/xsk/tx.h"
+#include "en_accel/ktls_txrx.h"
+
+static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
+{
+ int current_cpu = smp_processor_id();
+
+ return cpumask_test_cpu(current_cpu, c->aff_mask);
+}
+
+static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+ struct dim_sample dim_sample = {};
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
+ return;
+
+ dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
+ net_dim(&sq->dim, dim_sample);
+}
+
+static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct dim_sample dim_sample = {};
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
+ return;
+
+ dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
+ net_dim(&rq->dim, dim_sample);
+}
+
+void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_tx_wqe *nopwqe;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_NOP,
+ .num_wqebbs = 1,
+ };
+
+ nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
+}
+
+static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq)
+{
+ bool need_wakeup = xsk_uses_need_wakeup(xskrq->xsk_pool);
+ bool busy_xsk = false, xsk_rx_alloc_err;
+
+ /* If SQ is empty, there are no TX completions to trigger NAPI, so set
+ * need_wakeup. Do it before queuing packets for TX to avoid race
+ * condition with userspace.
+ */
+ if (need_wakeup && xsksq->pc == xsksq->cc)
+ xsk_set_tx_need_wakeup(xsksq->xsk_pool);
+ busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
+ /* If we queued some packets for TX, no need for wakeup anymore. */
+ if (need_wakeup && xsksq->pc != xsksq->cc)
+ xsk_clear_tx_need_wakeup(xsksq->xsk_pool);
+
+ /* If WQ is empty, RX won't trigger NAPI, so set need_wakeup. Do it
+ * before refilling to avoid race condition with userspace.
+ */
+ if (need_wakeup && !mlx5e_rqwq_get_cur_sz(xskrq))
+ xsk_set_rx_need_wakeup(xskrq->xsk_pool);
+ xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes,
+ mlx5e_post_rx_mpwqes,
+ mlx5e_post_rx_wqes,
+ xskrq);
+ /* Ask for wakeup if WQ is not full after refill. */
+ if (!need_wakeup)
+ busy_xsk |= xsk_rx_alloc_err;
+ else if (xsk_rx_alloc_err)
+ xsk_set_rx_need_wakeup(xskrq->xsk_pool);
+ else
+ xsk_clear_rx_need_wakeup(xskrq->xsk_pool);
+
+ return busy_xsk;
+}
+
+int mlx5e_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
+ napi);
+ struct mlx5e_ch_stats *ch_stats = c->stats;
+ struct mlx5e_xdpsq *xsksq = &c->xsksq;
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_rq *xskrq = &c->xskrq;
+ struct mlx5e_rq *rq = &c->rq;
+ bool aff_change = false;
+ bool busy_xsk = false;
+ bool busy = false;
+ int work_done = 0;
+ u16 qos_sqs_size;
+ bool xsk_open;
+ int i;
+
+ rcu_read_lock();
+
+ qos_sqs = rcu_dereference(c->qos_sqs);
+
+ xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
+ ch_stats->poll++;
+
+ for (i = 0; i < c->num_tc; i++)
+ busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
+
+ if (unlikely(qos_sqs)) {
+ smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */
+ qos_sqs_size = READ_ONCE(c->qos_sqs_size);
+
+ for (i = 0; i < qos_sqs_size; i++) {
+ struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
+
+ if (sq)
+ busy |= mlx5e_poll_tx_cq(&sq->cq, budget);
+ }
+ }
+
+ /* budget=0 means we may be in IRQ context, do as little as possible */
+ if (unlikely(!budget))
+ goto out;
+
+ busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
+
+ if (c->xdp)
+ busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
+
+ if (xsk_open)
+ work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
+
+ if (likely(budget - work_done))
+ work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
+
+ busy |= work_done == budget;
+
+ mlx5e_poll_ico_cq(&c->icosq.cq);
+ if (mlx5e_poll_ico_cq(&c->async_icosq.cq))
+ /* Don't clear the flag if nothing was polled to prevent
+ * queueing more WQEs and overflowing the async ICOSQ.
+ */
+ clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state);
+
+ /* Keep after async ICOSQ CQ poll */
+ if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget)))
+ busy |= mlx5e_ktls_rx_handle_resync_list(c, budget);
+
+ busy |= INDIRECT_CALL_2(rq->post_wqes,
+ mlx5e_post_rx_mpwqes,
+ mlx5e_post_rx_wqes,
+ rq);
+ if (xsk_open) {
+ busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
+ busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq);
+ }
+
+ busy |= busy_xsk;
+
+ if (busy) {
+ if (likely(mlx5e_channel_no_affinity_change(c))) {
+ work_done = budget;
+ goto out;
+ }
+ ch_stats->aff_change++;
+ aff_change = true;
+ if (budget && work_done == budget)
+ work_done--;
+ }
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ goto out;
+
+ ch_stats->arm++;
+
+ for (i = 0; i < c->num_tc; i++) {
+ mlx5e_handle_tx_dim(&c->sq[i]);
+ mlx5e_cq_arm(&c->sq[i].cq);
+ }
+ if (unlikely(qos_sqs)) {
+ for (i = 0; i < qos_sqs_size; i++) {
+ struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
+
+ if (sq) {
+ mlx5e_handle_tx_dim(sq);
+ mlx5e_cq_arm(&sq->cq);
+ }
+ }
+ }
+
+ mlx5e_handle_rx_dim(rq);
+
+ mlx5e_cq_arm(&rq->cq);
+ mlx5e_cq_arm(&c->icosq.cq);
+ mlx5e_cq_arm(&c->async_icosq.cq);
+ mlx5e_cq_arm(&c->xdpsq.cq);
+
+ if (xsk_open) {
+ mlx5e_handle_rx_dim(xskrq);
+ mlx5e_cq_arm(&xsksq->cq);
+ mlx5e_cq_arm(&xskrq->cq);
+ }
+
+ if (unlikely(aff_change && busy_xsk)) {
+ mlx5e_trigger_irq(&c->icosq);
+ ch_stats->force_irq++;
+ }
+
+out:
+ rcu_read_unlock();
+
+ return work_done;
+}
+
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
+{
+ struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
+
+ napi_schedule(cq->napi);
+ cq->event_ctr++;
+ cq->ch_stats->events++;
+}
+
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event)
+{
+ struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
+ struct net_device *netdev = cq->netdev;
+
+ netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n",
+ __func__, mcq->cqn, event);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
new file mode 100644
index 000000000..e112b5685
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -0,0 +1,1150 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2021, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/eq.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "fpga/core.h"
+#include "eswitch.h"
+#include "lib/clock.h"
+#include "diag/fw_tracer.h"
+#include "mlx5_irq.h"
+#include "devlink.h"
+
+enum {
+ MLX5_EQE_OWNER_INIT_VAL = 0x1,
+};
+
+enum {
+ MLX5_EQ_STATE_ARMED = 0x9,
+ MLX5_EQ_STATE_FIRED = 0xa,
+ MLX5_EQ_STATE_ALWAYS_ARMED = 0xb,
+};
+
+enum {
+ MLX5_EQ_DOORBEL_OFFSET = 0x40,
+};
+
+/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
+ * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
+ * used to set the EQ size, budget must be smaller than the EQ size.
+ */
+enum {
+ MLX5_EQ_POLLING_BUDGET = 128,
+};
+
+static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
+
+struct mlx5_eq_table {
+ struct list_head comp_eqs_list;
+ struct mlx5_eq_async pages_eq;
+ struct mlx5_eq_async cmd_eq;
+ struct mlx5_eq_async async_eq;
+
+ struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
+
+ /* Since CQ DB is stored in async_eq */
+ struct mlx5_nb cq_err_nb;
+
+ struct mutex lock; /* sync async eqs creations */
+ int num_comp_eqs;
+ struct mlx5_irq_table *irq_table;
+ struct mlx5_irq **comp_irqs;
+ struct mlx5_irq *ctrl_irq;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
+};
+
+#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
+ (1ull << MLX5_EVENT_TYPE_COMM_EST) | \
+ (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \
+ (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \
+ (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \
+ (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
+ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
+
+static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {};
+
+ MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
+ MLX5_SET(destroy_eq_in, in, eq_number, eqn);
+ return mlx5_cmd_exec_in(dev, destroy_eq, in);
+}
+
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *cq = NULL;
+
+ rcu_read_lock();
+ cq = radix_tree_lookup(&table->tree, cqn);
+ if (likely(cq))
+ mlx5_cq_hold(cq);
+ rcu_read_unlock();
+
+ return cq;
+}
+
+static int mlx5_eq_comp_int(struct notifier_block *nb,
+ __always_unused unsigned long action,
+ __always_unused void *data)
+{
+ struct mlx5_eq_comp *eq_comp =
+ container_of(nb, struct mlx5_eq_comp, irq_nb);
+ struct mlx5_eq *eq = &eq_comp->core;
+ struct mlx5_eqe *eqe;
+ int num_eqes = 0;
+ u32 cqn = -1;
+
+ eqe = next_eqe_sw(eq);
+ if (!eqe)
+ goto out;
+
+ do {
+ struct mlx5_core_cq *cq;
+
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ dma_rmb();
+ /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
+ cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (likely(cq)) {
+ ++cq->arm_sn;
+ cq->comp(cq, eqe);
+ mlx5_cq_put(cq);
+ } else {
+ dev_dbg_ratelimited(eq->dev->device,
+ "Completion event for bogus CQ 0x%x\n", cqn);
+ }
+
+ ++eq->cons_index;
+
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+
+out:
+ eq_update_ci(eq, 1);
+
+ if (cqn != -1)
+ tasklet_schedule(&eq_comp->tasklet_ctx.task);
+
+ return 0;
+}
+
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them. It is not recommended to use it, unless this is the last
+ * resort.
+ */
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
+{
+ u32 count_eqe;
+
+ disable_irq(eq->core.irqn);
+ count_eqe = eq->core.cons_index;
+ mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
+ count_eqe = eq->core.cons_index - count_eqe;
+ enable_irq(eq->core.irqn);
+
+ return count_eqe;
+}
+
+static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery,
+ unsigned long *flags)
+ __acquires(&eq->lock)
+{
+ if (!recovery)
+ spin_lock(&eq->lock);
+ else
+ spin_lock_irqsave(&eq->lock, *flags);
+}
+
+static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery,
+ unsigned long *flags)
+ __releases(&eq->lock)
+{
+ if (!recovery)
+ spin_unlock(&eq->lock);
+ else
+ spin_unlock_irqrestore(&eq->lock, *flags);
+}
+
+enum async_eq_nb_action {
+ ASYNC_EQ_IRQ_HANDLER = 0,
+ ASYNC_EQ_RECOVER = 1,
+};
+
+static int mlx5_eq_async_int(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct mlx5_eq_async *eq_async =
+ container_of(nb, struct mlx5_eq_async, irq_nb);
+ struct mlx5_eq *eq = &eq_async->core;
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_dev *dev;
+ struct mlx5_eqe *eqe;
+ unsigned long flags;
+ int num_eqes = 0;
+ bool recovery;
+
+ dev = eq->dev;
+ eqt = dev->priv.eq_table;
+
+ recovery = action == ASYNC_EQ_RECOVER;
+ mlx5_eq_async_int_lock(eq_async, recovery, &flags);
+
+ eqe = next_eqe_sw(eq);
+ if (!eqe)
+ goto out;
+
+ do {
+ /*
+ * Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ dma_rmb();
+
+ atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
+ atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
+
+ ++eq->cons_index;
+
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+
+out:
+ eq_update_ci(eq, 1);
+ mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
+
+ return unlikely(recovery) ? num_eqes : 0;
+}
+
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
+ int eqes;
+
+ eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
+ if (eqes)
+ mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
+}
+
+static void init_eq_buf(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe;
+ int i;
+
+ for (i = 0; i < eq_get_size(eq); i++) {
+ eqe = get_eqe(eq, i);
+ eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
+ }
+}
+
+static int
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct mlx5_eq_param *param)
+{
+ u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE);
+ struct mlx5_cq_table *cq_table = &eq->cq_table;
+ u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
+ u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
+ struct mlx5_priv *priv = &dev->priv;
+ __be64 *pas;
+ u16 vecidx;
+ void *eqc;
+ int inlen;
+ u32 *in;
+ int err;
+ int i;
+
+ /* Init CQ table */
+ memset(cq_table, 0, sizeof(*cq_table));
+ spin_lock_init(&cq_table->lock);
+ INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+
+ eq->cons_index = 0;
+
+ err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride),
+ &eq->frag_buf, dev->priv.numa_node);
+ if (err)
+ return err;
+
+ mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
+ init_eq_buf(eq);
+
+ eq->irq = param->irq;
+ vecidx = mlx5_irq_get_index(eq->irq);
+
+ inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
+ MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_buf;
+ }
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
+ mlx5_fill_page_frag_array(&eq->frag_buf, pas);
+
+ MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
+ if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
+ MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID);
+
+ for (i = 0; i < 4; i++)
+ MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
+ param->mask[i]);
+
+ eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
+ MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz);
+ MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
+ MLX5_SET(eqc, eqc, intr, vecidx);
+ MLX5_SET(eqc, eqc, log_page_size,
+ eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (err)
+ goto err_in;
+
+ eq->vecidx = vecidx;
+ eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
+ eq->irqn = pci_irq_vector(dev->pdev, vecidx);
+ eq->dev = dev;
+ eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
+
+ err = mlx5_debug_eq_add(dev, eq);
+ if (err)
+ goto err_eq;
+
+ kvfree(in);
+ return 0;
+
+err_eq:
+ mlx5_cmd_destroy_eq(dev, eq->eqn);
+
+err_in:
+ kvfree(in);
+
+err_buf:
+ mlx5_frag_buf_free(dev, &eq->frag_buf);
+ return err;
+}
+
+/**
+ * mlx5_eq_enable - Enable EQ for receiving EQEs
+ * @dev : Device which owns the eq
+ * @eq : EQ to enable
+ * @nb : Notifier call block
+ *
+ * Must be called after EQ is created in device.
+ *
+ * @return: 0 if no error
+ */
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb)
+{
+ int err;
+
+ err = mlx5_irq_attach_nb(eq->irq, nb);
+ if (!err)
+ eq_update_ci(eq, 1);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_eq_enable);
+
+/**
+ * mlx5_eq_disable - Disable EQ for receiving EQEs
+ * @dev : Device which owns the eq
+ * @eq : EQ to disable
+ * @nb : Notifier call block
+ *
+ * Must be called before EQ is destroyed.
+ */
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb)
+{
+ mlx5_irq_detach_nb(eq->irq, nb);
+}
+EXPORT_SYMBOL(mlx5_eq_disable);
+
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ mlx5_debug_eq_remove(dev, eq);
+
+ err = mlx5_cmd_destroy_eq(dev, eq->eqn);
+ if (err)
+ mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
+ eq->eqn);
+
+ mlx5_frag_buf_free(dev, &eq->frag_buf);
+ return err;
+}
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ int err;
+
+ spin_lock(&table->lock);
+ err = radix_tree_insert(&table->tree, cq->cqn, cq);
+ spin_unlock(&table->lock);
+
+ return err;
+}
+
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *tmp;
+
+ spin_lock(&table->lock);
+ tmp = radix_tree_delete(&table->tree, cq->cqn);
+ spin_unlock(&table->lock);
+
+ if (!tmp) {
+ mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n",
+ eq->eqn, cq->cqn);
+ return;
+ }
+
+ if (tmp != cq)
+ mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n",
+ eq->eqn, cq->cqn);
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *eq_table;
+ int i;
+
+ eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL,
+ dev->priv.numa_node);
+ if (!eq_table)
+ return -ENOMEM;
+
+ dev->priv.eq_table = eq_table;
+
+ mlx5_eq_debugfs_init(dev);
+
+ mutex_init(&eq_table->lock);
+ for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
+ ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+
+ eq_table->irq_table = mlx5_irq_table_get(dev);
+ return 0;
+}
+
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
+{
+ mlx5_eq_debugfs_cleanup(dev);
+ kvfree(dev->priv.eq_table);
+}
+
+/* Async EQs */
+
+static int create_async_eq(struct mlx5_core_dev *dev,
+ struct mlx5_eq *eq, struct mlx5_eq_param *param)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int err;
+
+ mutex_lock(&eq_table->lock);
+ err = create_map_eq(dev, eq, param);
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int err;
+
+ mutex_lock(&eq_table->lock);
+ err = destroy_unmap_eq(dev, eq);
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int cq_err_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_cq *cq;
+ struct mlx5_eqe *eqe;
+ struct mlx5_eq *eq;
+ u32 cqn;
+
+ /* type == MLX5_EVENT_TYPE_CQ_ERROR */
+
+ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
+ eq = &eqt->async_eq.core;
+ eqe = data;
+
+ cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+ cqn, eqe->data.cq_err.syndrome);
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+ return NOTIFY_OK;
+ }
+
+ if (cq->event)
+ cq->event(cq, type);
+
+ mlx5_cq_put(cq);
+
+ return NOTIFY_OK;
+}
+
+static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4])
+{
+ __be64 *user_unaffiliated_events;
+ __be64 *user_affiliated_events;
+ int i;
+
+ user_affiliated_events =
+ MLX5_CAP_DEV_EVENT(dev, user_affiliated_events);
+ user_unaffiliated_events =
+ MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events);
+
+ for (i = 0; i < 4; i++)
+ mask[i] |= be64_to_cpu(user_affiliated_events[i] |
+ user_unaffiliated_events[i]);
+}
+
+static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
+{
+ u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+
+ if (MLX5_VPORT_MANAGER(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
+
+ if (MLX5_CAP_GEN(dev, general_notification_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
+
+ if (MLX5_CAP_GEN(dev, port_module_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
+ else
+ mlx5_core_dbg(dev, "port_module_event is not set\n");
+
+ if (MLX5_PPS_CAP(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
+
+ if (MLX5_CAP_GEN(dev, fpga))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) |
+ (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR);
+ if (MLX5_CAP_GEN_MAX(dev, dct))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
+
+ if (MLX5_CAP_GEN(dev, temp_warn_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
+
+ if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
+
+ if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
+
+ if (mlx5_eswitch_is_funcs_handler(dev))
+ async_event_mask |=
+ (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
+
+ if (MLX5_CAP_GEN_MAX(dev, vhca_state))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE);
+
+ if (MLX5_CAP_MACSEC(dev, log_max_macsec_offload))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE);
+
+ mask[0] = async_event_mask;
+
+ if (MLX5_CAP_GEN(dev, event_cap))
+ gather_user_async_events(dev, mask);
+}
+
+static int
+setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
+ struct mlx5_eq_param *param, const char *name)
+{
+ int err;
+
+ eq->irq_nb.notifier_call = mlx5_eq_async_int;
+ spin_lock_init(&eq->lock);
+
+ err = create_async_eq(dev, &eq->core, param);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
+ return err;
+ }
+ err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err);
+ destroy_async_eq(dev, &eq->core);
+ }
+ return err;
+}
+
+static void cleanup_async_eq(struct mlx5_core_dev *dev,
+ struct mlx5_eq_async *eq, const char *name)
+{
+ int err;
+
+ mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
+ err = destroy_async_eq(dev, &eq->core);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n",
+ name, err);
+}
+
+static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ &val);
+ if (!err)
+ return val.vu32;
+ mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
+ return MLX5_NUM_ASYNC_EQE;
+}
+static int create_async_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_param param = {};
+ int err;
+
+ /* All the async_eqs are using single IRQ, request one IRQ and share its
+ * index among all the async_eqs of this device.
+ */
+ table->ctrl_irq = mlx5_ctrl_irq_request(dev);
+ if (IS_ERR(table->ctrl_irq))
+ return PTR_ERR(table->ctrl_irq);
+
+ MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
+ mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+
+ param = (struct mlx5_eq_param) {
+ .irq = table->ctrl_irq,
+ .nent = MLX5_NUM_CMD_EQE,
+ .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
+ };
+ mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
+ err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
+ if (err)
+ goto err1;
+
+ mlx5_cmd_use_events(dev);
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
+
+ param = (struct mlx5_eq_param) {
+ .irq = table->ctrl_irq,
+ .nent = async_eq_depth_devlink_param_get(dev),
+ };
+
+ gather_async_events_mask(dev, param.mask);
+ err = setup_async_eq(dev, &table->async_eq, &param, "async");
+ if (err)
+ goto err2;
+
+ param = (struct mlx5_eq_param) {
+ .irq = table->ctrl_irq,
+ .nent = /* TODO: sriov max_vf + */ 1,
+ .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
+ };
+
+ err = setup_async_eq(dev, &table->pages_eq, &param, "pages");
+ if (err)
+ goto err3;
+
+ return 0;
+
+err3:
+ cleanup_async_eq(dev, &table->async_eq, "async");
+err2:
+ mlx5_cmd_use_polling(dev);
+ cleanup_async_eq(dev, &table->cmd_eq, "cmd");
+err1:
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
+ return err;
+}
+
+static void destroy_async_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ cleanup_async_eq(dev, &table->pages_eq, "pages");
+ cleanup_async_eq(dev, &table->async_eq, "async");
+ mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
+ mlx5_cmd_use_polling(dev);
+ cleanup_async_eq(dev, &table->cmd_eq, "cmd");
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
+}
+
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
+{
+ return &dev->priv.eq_table->async_eq.core;
+}
+
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
+}
+
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
+}
+
+/* Generic EQ API for mlx5_core consumers
+ * Needed For RDMA ODP EQ for now
+ */
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev,
+ struct mlx5_eq_param *param)
+{
+ struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL,
+ dev->priv.numa_node);
+ int err;
+
+ if (!eq)
+ return ERR_PTR(-ENOMEM);
+
+ param->irq = dev->priv.eq_table->ctrl_irq;
+ err = create_async_eq(dev, eq, param);
+ if (err) {
+ kvfree(eq);
+ eq = ERR_PTR(err);
+ }
+
+ return eq;
+}
+EXPORT_SYMBOL(mlx5_eq_create_generic);
+
+int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ if (IS_ERR(eq))
+ return -EINVAL;
+
+ err = destroy_async_eq(dev, eq);
+ if (err)
+ goto out;
+
+ kvfree(eq);
+out:
+ return err;
+}
+EXPORT_SYMBOL(mlx5_eq_destroy_generic);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
+{
+ u32 ci = eq->cons_index + cc;
+ u32 nent = eq_get_size(eq);
+ struct mlx5_eqe *eqe;
+
+ eqe = get_eqe(eq, ci & (nent - 1));
+ eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe;
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ if (eqe)
+ dma_rmb();
+
+ return eqe;
+}
+EXPORT_SYMBOL(mlx5_eq_get_eqe);
+
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val;
+
+ eq->cons_index += cc;
+ val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ wmb();
+}
+EXPORT_SYMBOL(mlx5_eq_update_ci);
+
+static void comp_irqs_release(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ if (mlx5_core_is_sf(dev))
+ mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
+ else
+ mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
+ kfree(table->comp_irqs);
+}
+
+static int comp_irqs_request(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int ncomp_eqs = table->num_comp_eqs;
+ u16 *cpus;
+ int ret;
+ int i;
+
+ ncomp_eqs = table->num_comp_eqs;
+ table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
+ if (!table->comp_irqs)
+ return -ENOMEM;
+ if (mlx5_core_is_sf(dev)) {
+ ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+ }
+
+ cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
+ if (!cpus) {
+ ret = -ENOMEM;
+ goto free_irqs;
+ }
+ for (i = 0; i < ncomp_eqs; i++)
+ cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
+ ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
+ kfree(cpus);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+
+free_irqs:
+ kfree(table->comp_irqs);
+ return ret;
+}
+
+static void destroy_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ list_del(&eq->list);
+ mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
+ if (destroy_unmap_eq(dev, &eq->core))
+ mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
+ eq->core.eqn);
+ tasklet_disable(&eq->tasklet_ctx.task);
+ kfree(eq);
+ }
+ comp_irqs_release(dev);
+}
+
+static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ &val);
+ if (!err)
+ return val.vu32;
+ mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
+ return MLX5_COMP_EQ_SIZE;
+}
+
+static int create_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq;
+ int ncomp_eqs;
+ int nent;
+ int err;
+ int i;
+
+ ncomp_eqs = comp_irqs_request(dev);
+ if (ncomp_eqs < 0)
+ return ncomp_eqs;
+ INIT_LIST_HEAD(&table->comp_eqs_list);
+ nent = comp_eq_depth_devlink_param_get(dev);
+
+ for (i = 0; i < ncomp_eqs; i++) {
+ struct mlx5_eq_param param = {};
+
+ eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node);
+ if (!eq) {
+ err = -ENOMEM;
+ goto clean;
+ }
+
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb);
+
+ eq->irq_nb.notifier_call = mlx5_eq_comp_int;
+ param = (struct mlx5_eq_param) {
+ .irq = table->comp_irqs[i],
+ .nent = nent,
+ };
+
+ err = create_map_eq(dev, &eq->core, &param);
+ if (err)
+ goto clean_eq;
+ err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
+ if (err) {
+ destroy_unmap_eq(dev, &eq->core);
+ goto clean_eq;
+ }
+
+ mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
+ /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
+ list_add_tail(&eq->list, &table->comp_eqs_list);
+ }
+
+ table->num_comp_eqs = ncomp_eqs;
+ return 0;
+
+clean_eq:
+ kfree(eq);
+clean:
+ destroy_comp_eqs(dev);
+ return err;
+}
+
+static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+ unsigned int *irqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+ int err = -ENOENT;
+ int i = 0;
+
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (i++ == vector) {
+ if (irqn)
+ *irqn = eq->core.irqn;
+ if (eqn)
+ *eqn = eq->core.eqn;
+ err = 0;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn)
+{
+ return vector2eqnirqn(dev, vector, eqn, NULL);
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn)
+{
+ return vector2eqnirqn(dev, vector, NULL, irqn);
+}
+
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
+{
+ return dev->priv.eq_table->num_comp_eqs;
+}
+EXPORT_SYMBOL(mlx5_comp_vectors_count);
+
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+ int i = 0;
+
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (i++ == vector)
+ break;
+ }
+
+ return mlx5_irq_get_affinity_mask(eq->core.irq);
+}
+EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
+{
+ return dev->priv.eq_table->rmap;
+}
+#endif
+
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq;
+
+ list_for_each_entry(eq, &table->comp_eqs_list, list) {
+ if (eq->core.eqn == eqn)
+ return eq;
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+
+static void clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+ free_irq_cpu_rmap(eq_table->rmap);
+#endif
+}
+
+static int set_rmap(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+ int vecidx;
+
+ eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
+ if (!eq_table->rmap) {
+ err = -ENOMEM;
+ mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+ goto err_out;
+ }
+
+ for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) {
+ err = irq_cpu_rmap_add(eq_table->rmap,
+ pci_irq_vector(mdev->pdev, vecidx));
+ if (err) {
+ mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+ err);
+ goto err_irq_cpu_rmap_add;
+ }
+ }
+ return 0;
+
+err_irq_cpu_rmap_add:
+ clear_rmap(mdev);
+err_out:
+#endif
+ return err;
+}
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
+ mlx5_irq_table_free_irqs(dev);
+ mutex_unlock(&table->lock);
+}
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+#define MLX5_MAX_ASYNC_EQS 4
+#else
+#define MLX5_MAX_ASYNC_EQS 3
+#endif
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int max_eqs_sf;
+ int err;
+
+ eq_table->num_comp_eqs =
+ min_t(int,
+ mlx5_irq_table_get_num_comp(eq_table->irq_table),
+ num_eqs - MLX5_MAX_ASYNC_EQS);
+ if (mlx5_core_is_sf(dev)) {
+ max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
+ mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
+ eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
+ max_eqs_sf);
+ }
+
+ err = create_async_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create async EQs\n");
+ goto err_async_eqs;
+ }
+
+ if (!mlx5_core_is_sf(dev)) {
+ /* rmap is a mapping between irq number and queue number.
+ * each irq can be assign only to a single rmap.
+ * since SFs share IRQs, rmap mapping cannot function correctly
+ * for irqs that are shared for different core/netdev RX rings.
+ * Hence we don't allow netdev rmap for SFs
+ */
+ err = set_rmap(dev);
+ if (err)
+ goto err_rmap;
+ }
+
+ err = create_comp_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create completion EQs\n");
+ goto err_comp_eqs;
+ }
+
+ return 0;
+err_comp_eqs:
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
+err_rmap:
+ destroy_async_eqs(dev);
+err_async_eqs:
+ return err;
+}
+
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
+ destroy_comp_eqs(dev);
+ destroy_async_eqs(dev);
+}
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
+}
+EXPORT_SYMBOL(mlx5_eq_notifier_register);
+
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
+}
+EXPORT_SYMBOL(mlx5_eq_notifier_unregister);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile
new file mode 100644
index 000000000..c78512eed
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
new file mode 100644
index 000000000..6b4c9ffad
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "lgcy.h"
+
+static void esw_acl_egress_lgcy_rules_destroy(struct mlx5_vport *vport)
+{
+ esw_acl_egress_vlan_destroy(vport);
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) {
+ mlx5_del_flow_rules(vport->egress.legacy.drop_rule);
+ vport->egress.legacy.drop_rule = NULL;
+ }
+}
+
+static int esw_acl_egress_lgcy_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_group *drop_grp;
+ u32 *flow_group_in;
+ int err = 0;
+
+ err = esw_acl_egress_vlan_grp_create(esw, vport);
+ if (err)
+ return err;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ drop_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+ if (IS_ERR(drop_grp)) {
+ err = PTR_ERR(drop_grp);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto drop_grp_err;
+ }
+
+ vport->egress.legacy.drop_grp = drop_grp;
+ kvfree(flow_group_in);
+ return 0;
+
+drop_grp_err:
+ kvfree(flow_group_in);
+alloc_err:
+ esw_acl_egress_vlan_grp_destroy(vport);
+ return err;
+}
+
+static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_grp)) {
+ mlx5_destroy_flow_group(vport->egress.legacy.drop_grp);
+ vport->egress.legacy.drop_grp = NULL;
+ }
+ esw_acl_egress_vlan_grp_destroy(vport);
+}
+
+int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+ struct mlx5_flow_destination drop_ctr_dst = {};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_fc *drop_counter = NULL;
+ struct mlx5_flow_act flow_act = {};
+ /* The egress acl table contains 2 rules:
+ * 1)Allow traffic with vlan_tag=vst_vlan_id
+ * 2)Drop all other traffic.
+ */
+ int table_size = 2;
+ int dest_num = 0;
+ int actions_flag;
+ int err = 0;
+
+ if (vport->egress.legacy.drop_counter) {
+ drop_counter = vport->egress.legacy.drop_counter;
+ } else if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) {
+ drop_counter = mlx5_fc_create(esw->dev, false);
+ if (IS_ERR(drop_counter)) {
+ esw_warn(esw->dev,
+ "vport[%d] configure egress drop rule counter err(%ld)\n",
+ vport->vport, PTR_ERR(drop_counter));
+ drop_counter = NULL;
+ }
+ vport->egress.legacy.drop_counter = drop_counter;
+ }
+
+ esw_acl_egress_lgcy_rules_destroy(vport);
+
+ if (!vport->info.vlan && !vport->info.qos) {
+ esw_acl_egress_lgcy_cleanup(esw, vport);
+ return 0;
+ }
+
+ if (!vport->egress.acl) {
+ vport->egress.acl = esw_acl_table_create(esw, vport,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ table_size);
+ if (IS_ERR(vport->egress.acl)) {
+ err = PTR_ERR(vport->egress.acl);
+ vport->egress.acl = NULL;
+ goto out;
+ }
+
+ err = esw_acl_egress_lgcy_groups_create(esw, vport);
+ if (err)
+ goto out;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ /* Allowed vlan rule */
+ actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ if (vst_mode_steering)
+ actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan,
+ actions_flag);
+ if (err)
+ goto out;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /* Attach egress drop flow counter */
+ if (drop_counter) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter);
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->egress.legacy.drop_rule =
+ mlx5_add_flow_rules(vport->egress.acl, NULL,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->egress.legacy.drop_rule)) {
+ err = PTR_ERR(vport->egress.legacy.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress drop rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.legacy.drop_rule = NULL;
+ goto out;
+ }
+
+ return err;
+
+out:
+ esw_acl_egress_lgcy_cleanup(esw, vport);
+ return err;
+}
+
+void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->egress.acl))
+ goto clean_drop_counter;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
+
+ esw_acl_egress_lgcy_rules_destroy(vport);
+ esw_acl_egress_lgcy_groups_destroy(vport);
+ esw_acl_egress_table_destroy(vport);
+
+clean_drop_counter:
+ if (vport->egress.legacy.drop_counter) {
+ mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
+ vport->egress.legacy.drop_counter = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
new file mode 100644
index 000000000..2e504c746
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "ofld.h"
+
+static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->egress.offloads.fwd_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->egress.offloads.fwd_rule);
+ vport->egress.offloads.fwd_rule = NULL;
+}
+
+static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->egress.offloads.bounce_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
+ vport->egress.offloads.bounce_rule = NULL;
+}
+
+static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest)
+{
+ struct mlx5_flow_act flow_act = {};
+ int err = 0;
+
+ esw_debug(esw->dev, "vport(%d) configure egress acl rule fwd2vport(%d)\n",
+ vport->vport, fwd_dest->vport.num);
+
+ /* Delete the old egress forward-to-vport rule if any */
+ esw_acl_egress_ofld_fwd2vport_destroy(vport);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ vport->egress.offloads.fwd_rule =
+ mlx5_add_flow_rules(vport->egress.acl, NULL,
+ &flow_act, fwd_dest, 1);
+ if (IS_ERR(vport->egress.offloads.fwd_rule)) {
+ err = PTR_ERR(vport->egress.offloads.fwd_rule);
+ esw_warn(esw->dev,
+ "vport(%d) failed to add fwd2vport acl rule err(%d)\n",
+ vport->vport, err);
+ vport->egress.offloads.fwd_rule = NULL;
+ }
+
+ return err;
+}
+
+static int esw_acl_egress_ofld_rules_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest)
+{
+ int err = 0;
+ int action;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
+ /* For prio tag mode, there is only 1 FTEs:
+ * 1) prio tag packets - pop the prio tag VLAN, allow
+ * Unmatched traffic is allowed by default
+ */
+ esw_debug(esw->dev,
+ "vport[%d] configure prio tag egress rules\n", vport->vport);
+
+ action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ action |= fwd_dest ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ /* prio tag vlan rule - pop it so vport receives untagged packets */
+ err = esw_egress_acl_vlan_create(esw, vport, fwd_dest, 0, action);
+ if (err)
+ goto prio_err;
+ }
+
+ if (fwd_dest) {
+ err = esw_acl_egress_ofld_fwd2vport_create(esw, vport, fwd_dest);
+ if (err)
+ goto fwd_err;
+ }
+
+ return 0;
+
+fwd_err:
+ esw_acl_egress_vlan_destroy(vport);
+prio_err:
+ return err;
+}
+
+static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
+{
+ esw_acl_egress_vlan_destroy(vport);
+ esw_acl_egress_ofld_fwd2vport_destroy(vport);
+ esw_acl_egress_ofld_bounce_rule_destroy(vport);
+}
+
+static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fwd_grp;
+ u32 *flow_group_in;
+ u32 flow_index = 0;
+ int ret = 0;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
+ ret = esw_acl_egress_vlan_grp_create(esw, vport);
+ if (ret)
+ return ret;
+
+ flow_index++;
+ }
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(esw))
+ goto out;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ ret = -ENOMEM;
+ goto fwd_grp_err;
+ }
+
+ /* This group holds 1 FTE to forward all packets to other vport
+ * when bond vports is supported.
+ */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+ fwd_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+ if (IS_ERR(fwd_grp)) {
+ ret = PTR_ERR(fwd_grp);
+ esw_warn(esw->dev,
+ "Failed to create vport[%d] egress fwd2vport flow group, err(%d)\n",
+ vport->vport, ret);
+ kvfree(flow_group_in);
+ goto fwd_grp_err;
+ }
+ vport->egress.offloads.fwd_grp = fwd_grp;
+ kvfree(flow_group_in);
+ return 0;
+
+fwd_grp_err:
+ esw_acl_egress_vlan_grp_destroy(vport);
+out:
+ return ret;
+}
+
+static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.offloads.fwd_grp)) {
+ mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
+ vport->egress.offloads.fwd_grp = NULL;
+ }
+
+ if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
+ mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+ vport->egress.offloads.bounce_grp = NULL;
+ }
+
+ esw_acl_egress_vlan_grp_destroy(vport);
+}
+
+static bool esw_acl_egress_needed(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num);
+}
+
+int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int table_size = 0;
+ int err;
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(esw) &&
+ !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
+ if (!esw_acl_egress_needed(esw, vport->vport))
+ return 0;
+
+ esw_acl_egress_ofld_rules_destroy(vport);
+
+ if (mlx5_esw_acl_egress_fwd2vport_supported(esw))
+ table_size++;
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ table_size++;
+ vport->egress.acl = esw_acl_table_create(esw, vport,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size);
+ if (IS_ERR(vport->egress.acl)) {
+ err = PTR_ERR(vport->egress.acl);
+ vport->egress.acl = NULL;
+ return err;
+ }
+
+ err = esw_acl_egress_ofld_groups_create(esw, vport);
+ if (err)
+ goto group_err;
+
+ esw_debug(esw->dev, "vport[%d] configure egress rules\n", vport->vport);
+
+ err = esw_acl_egress_ofld_rules_create(esw, vport, NULL);
+ if (err)
+ goto rules_err;
+
+ return 0;
+
+rules_err:
+ esw_acl_egress_ofld_groups_destroy(vport);
+group_err:
+ esw_acl_egress_table_destroy(vport);
+ return err;
+}
+
+void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport)
+{
+ esw_acl_egress_ofld_rules_destroy(vport);
+ esw_acl_egress_ofld_groups_destroy(vport);
+ esw_acl_egress_table_destroy(vport);
+}
+
+int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
+ u16 passive_vport_num)
+{
+ struct mlx5_vport *passive_vport = mlx5_eswitch_get_vport(esw, passive_vport_num);
+ struct mlx5_vport *active_vport = mlx5_eswitch_get_vport(esw, active_vport_num);
+ struct mlx5_flow_destination fwd_dest = {};
+
+ if (IS_ERR(active_vport))
+ return PTR_ERR(active_vport);
+ if (IS_ERR(passive_vport))
+ return PTR_ERR(passive_vport);
+
+ /* Cleanup and recreate rules WITHOUT fwd2vport of active vport */
+ esw_acl_egress_ofld_rules_destroy(active_vport);
+ esw_acl_egress_ofld_rules_create(esw, active_vport, NULL);
+
+ /* Cleanup and recreate all rules + fwd2vport rule of passive vport to forward */
+ esw_acl_egress_ofld_rules_destroy(passive_vport);
+ fwd_dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ fwd_dest.vport.num = active_vport_num;
+ fwd_dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ fwd_dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+ return esw_acl_egress_ofld_rules_create(esw, passive_vport, &fwd_dest);
+}
+
+int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ esw_acl_egress_ofld_rules_destroy(vport);
+ return esw_acl_egress_ofld_rules_create(esw, vport, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
new file mode 100644
index 000000000..45b839116
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+
+struct mlx5_flow_table *
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *acl;
+ int acl_supported;
+ u16 vport_num;
+ int err;
+
+ acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ?
+ MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) :
+ MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support);
+
+ if (!acl_supported)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ vport_num = vport->vport;
+ esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num,
+ ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress");
+
+ root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n",
+ vport_num);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ ft_attr.max_fte = size;
+ ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT;
+ acl = mlx5_create_vport_flow_table(root_ns, &ft_attr, vport_num);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ esw_warn(dev, "vport[%d] create %s ACL table, err(%d)\n", vport_num,
+ ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress", err);
+ }
+ return acl;
+}
+
+int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest,
+ u16 vlan_id, u32 flow_action)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ if (vport->egress.allowed_vlan)
+ return -EEXIST;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = flow_action;
+ vport->egress.allowed_vlan =
+ mlx5_add_flow_rules(vport->egress.acl, spec,
+ &flow_act, fwd_dest, 0);
+ if (IS_ERR(vport->egress.allowed_vlan)) {
+ err = PTR_ERR(vport->egress.allowed_vlan);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress vlan rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.allowed_vlan = NULL;
+ }
+
+ kvfree(spec);
+ return err;
+}
+
+void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
+ mlx5_del_flow_rules(vport->egress.allowed_vlan);
+ vport->egress.allowed_vlan = NULL;
+ }
+}
+
+int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *vlan_grp;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int ret = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ vlan_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+ if (IS_ERR(vlan_grp)) {
+ ret = PTR_ERR(vlan_grp);
+ esw_warn(esw->dev,
+ "Failed to create E-Switch vport[%d] egress pop vlans flow group, err(%d)\n",
+ vport->vport, ret);
+ goto out;
+ }
+ vport->egress.vlan_grp = vlan_grp;
+
+out:
+ kvfree(flow_group_in);
+ return ret;
+}
+
+void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.vlan_grp)) {
+ mlx5_destroy_flow_group(vport->egress.vlan_grp);
+ vport->egress.vlan_grp = NULL;
+ }
+}
+
+void esw_acl_egress_table_destroy(struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->egress.acl))
+ return;
+
+ mlx5_destroy_flow_table(vport->egress.acl);
+ vport->egress.acl = NULL;
+}
+
+void esw_acl_ingress_table_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->ingress.acl)
+ return;
+
+ mlx5_destroy_flow_table(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+}
+
+void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->ingress.allow_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->ingress.allow_rule);
+ vport->ingress.allow_rule = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
new file mode 100644
index 000000000..a47063fab
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_HELPER_H__
+#define __MLX5_ESWITCH_ACL_HELPER_H__
+
+#include "eswitch.h"
+
+/* General acl helper functions */
+struct mlx5_flow_table *
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size);
+
+/* Egress acl helper functions */
+void esw_acl_egress_table_destroy(struct mlx5_vport *vport);
+int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest,
+ u16 vlan_id, u32 flow_action);
+void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport);
+int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport);
+
+/* Ingress acl helper functions */
+void esw_acl_ingress_table_destroy(struct mlx5_vport *vport);
+void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport);
+
+#endif /* __MLX5_ESWITCH_ACL_HELPER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
new file mode 100644
index 000000000..093ed86a0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "lgcy.h"
+
+static void esw_acl_ingress_lgcy_rules_destroy(struct mlx5_vport *vport)
+{
+ if (vport->ingress.legacy.drop_rule) {
+ mlx5_del_flow_rules(vport->ingress.legacy.drop_rule);
+ vport->ingress.legacy.drop_rule = NULL;
+ }
+ esw_acl_ingress_allow_rule_destroy(vport);
+}
+
+static int esw_acl_ingress_lgcy_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int err;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto spoof_err;
+ }
+ vport->ingress.legacy.allow_untagged_spoofchk_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
+ vport->vport, err);
+ goto untagged_err;
+ }
+ vport->ingress.legacy.allow_untagged_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto allow_spoof_err;
+ }
+ vport->ingress.legacy.allow_spoofchk_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto drop_err;
+ }
+ vport->ingress.legacy.drop_grp = g;
+ kvfree(flow_group_in);
+ return 0;
+
+drop_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
+ vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
+ }
+allow_spoof_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
+ vport->ingress.legacy.allow_untagged_only_grp = NULL;
+ }
+untagged_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
+ vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
+ }
+spoof_err:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport)
+{
+ if (vport->ingress.legacy.allow_spoofchk_only_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
+ vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
+ }
+ if (vport->ingress.legacy.allow_untagged_only_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
+ vport->ingress.legacy.allow_untagged_only_grp = NULL;
+ }
+ if (vport->ingress.legacy.allow_untagged_spoofchk_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
+ vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
+ }
+ if (vport->ingress.legacy.drop_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp);
+ vport->ingress.legacy.drop_grp = NULL;
+ }
+}
+
+int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+ struct mlx5_flow_destination drop_ctr_dst = {};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_fc *counter = NULL;
+ bool vst_check_cvlan = false;
+ bool vst_push_cvlan = false;
+ /* The ingress acl table contains 4 groups
+ * (2 active rules at the same time -
+ * 1 allow rule from one of the first 3 groups.
+ * 1 drop rule from the last group):
+ * 1)Allow untagged traffic with smac=original mac.
+ * 2)Allow untagged traffic.
+ * 3)Allow traffic with smac=original mac.
+ * 4)Drop all other traffic.
+ */
+ int table_size = 4;
+ int dest_num = 0;
+ int err = 0;
+ u8 *smac_v;
+
+ esw_acl_ingress_lgcy_rules_destroy(vport);
+
+ if (vport->ingress.legacy.drop_counter) {
+ counter = vport->ingress.legacy.drop_counter;
+ } else if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) {
+ counter = mlx5_fc_create(esw->dev, false);
+ if (IS_ERR(counter)) {
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress drop rule counter failed\n",
+ vport->vport);
+ counter = NULL;
+ }
+ vport->ingress.legacy.drop_counter = counter;
+ }
+
+ if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+ return 0;
+ }
+
+ if (!vport->ingress.acl) {
+ vport->ingress.acl = esw_acl_table_create(esw, vport,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ table_size);
+ if (IS_ERR(vport->ingress.acl)) {
+ err = PTR_ERR(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+ return err;
+ }
+
+ err = esw_acl_ingress_lgcy_groups_create(esw, vport);
+ if (err)
+ goto out;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if ((vport->info.vlan || vport->info.qos)) {
+ if (vst_mode_steering)
+ vst_push_cvlan = true;
+ else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always))
+ vst_check_cvlan = true;
+ }
+
+ if (vst_check_cvlan || vport->info.spoofchk)
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ /* Create ingress allow rule */
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ if (vst_push_cvlan) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ flow_act.vlan[0].prio = vport->info.qos;
+ flow_act.vlan[0].vid = vport->info.vlan;
+ flow_act.vlan[0].ethtype = ETH_P_8021Q;
+ }
+
+ if (vst_check_cvlan)
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+
+ if (vport->info.spoofchk) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.smac_15_0);
+ smac_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ outer_headers.smac_47_16);
+ ether_addr_copy(smac_v, vport->info.mac);
+ }
+
+ vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.allow_rule)) {
+ err = PTR_ERR(vport->ingress.allow_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress allow rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.allow_rule = NULL;
+ goto out;
+ }
+
+ if (!vst_check_cvlan && !vport->info.spoofchk)
+ goto out;
+
+ memset(&flow_act, 0, sizeof(flow_act));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ /* Attach drop flow counter */
+ if (counter) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter_id = mlx5_fc_id(counter);
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->ingress.legacy.drop_rule =
+ mlx5_add_flow_rules(vport->ingress.acl, NULL,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->ingress.legacy.drop_rule)) {
+ err = PTR_ERR(vport->ingress.legacy.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress drop rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.legacy.drop_rule = NULL;
+ goto out;
+ }
+ kvfree(spec);
+ return 0;
+
+out:
+ if (err)
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+ kvfree(spec);
+ return err;
+}
+
+void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->ingress.acl))
+ goto clean_drop_counter;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
+
+ esw_acl_ingress_lgcy_rules_destroy(vport);
+ esw_acl_ingress_lgcy_groups_destroy(vport);
+ esw_acl_ingress_table_destroy(vport);
+
+clean_drop_counter:
+ if (vport->ingress.legacy.drop_counter) {
+ mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
+ vport->ingress.legacy.drop_counter = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
new file mode 100644
index 000000000..db578a7e7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "ofld.h"
+
+static bool
+esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw,
+ const struct mlx5_vport *vport)
+{
+ return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ mlx5_eswitch_is_vf_vport(esw, vport->vport));
+}
+
+static int esw_acl_ingress_prio_tag_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ /* For prio tag mode, there is only 1 FTEs:
+ * 1) Untagged packets - push prio tag VLAN and modify metadata if
+ * required, allow
+ * Unmatched traffic is allowed by default
+ */
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Untagged packets - push prio tag VLAN, allow */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.vlan[0].ethtype = ETH_P_8021Q;
+ flow_act.vlan[0].vid = 0;
+ flow_act.vlan[0].prio = 0;
+
+ if (vport->ingress.offloads.modify_metadata_rule) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
+ }
+
+ vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.allow_rule)) {
+ err = PTR_ERR(vport->ingress.allow_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress untagged allow rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.allow_rule = NULL;
+ }
+
+ kvfree(spec);
+ return err;
+}
+
+static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_flow_act flow_act = {};
+ int err = 0;
+ u32 key;
+
+ key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport);
+ key >>= ESW_SOURCE_PORT_METADATA_OFFSET;
+
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+ MLX5_SET(set_action_in, action, data, key);
+ MLX5_SET(set_action_in, action, offset,
+ ESW_SOURCE_PORT_METADATA_OFFSET);
+ MLX5_SET(set_action_in, action, length,
+ ESW_SOURCE_PORT_METADATA_BITS);
+
+ vport->ingress.offloads.modify_metadata =
+ mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ 1, action);
+ if (IS_ERR(vport->ingress.offloads.modify_metadata)) {
+ err = PTR_ERR(vport->ingress.offloads.modify_metadata);
+ esw_warn(esw->dev,
+ "failed to alloc modify header for vport %d ingress acl (%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
+ flow_act.fg = vport->ingress.offloads.metadata_allmatch_grp;
+ vport->ingress.offloads.modify_metadata_rule =
+ mlx5_add_flow_rules(vport->ingress.acl,
+ NULL, &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) {
+ err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule);
+ esw_warn(esw->dev,
+ "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+ vport->vport, err);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
+ vport->ingress.offloads.modify_metadata_rule = NULL;
+ }
+ return err;
+}
+
+static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!vport->ingress.offloads.modify_metadata_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
+ vport->ingress.offloads.modify_metadata_rule = NULL;
+}
+
+static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+ int err = 0;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ flow_act.fg = vport->ingress.offloads.drop_grp;
+ flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ }
+
+ vport->ingress.offloads.drop_rule = flow_rule;
+out:
+ return err;
+}
+
+static void esw_acl_ingress_src_port_drop_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!vport->ingress.offloads.drop_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->ingress.offloads.drop_rule);
+ vport->ingress.offloads.drop_rule = NULL;
+}
+
+static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int err;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = esw_acl_ingress_mod_metadata_create(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "vport(%d) create ingress modify metadata, err(%d)\n",
+ vport->vport, err);
+ return err;
+ }
+ }
+
+ if (esw_acl_ingress_prio_tag_enabled(esw, vport)) {
+ err = esw_acl_ingress_prio_tag_create(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "vport(%d) create ingress prio tag rule, err(%d)\n",
+ vport->vport, err);
+ goto prio_tag_err;
+ }
+ }
+
+ return 0;
+
+prio_tag_err:
+ esw_acl_ingress_mod_metadata_destroy(esw, vport);
+ return err;
+}
+
+static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ esw_acl_ingress_allow_rule_destroy(vport);
+ esw_acl_ingress_mod_metadata_destroy(esw, vport);
+ esw_acl_ingress_src_port_drop_destroy(esw, vport);
+}
+
+static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ u32 flow_index = 0;
+ int ret = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ if (vport->vport == MLX5_VPORT_UPLINK) {
+ /* This group can hold an FTE to drop all traffic.
+ * Need in case LAG is enabled.
+ */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ ret = PTR_ERR(g);
+ esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+ vport->vport, ret);
+ goto drop_err;
+ }
+ vport->ingress.offloads.drop_grp = g;
+ flow_index++;
+ }
+
+ if (esw_acl_ingress_prio_tag_enabled(esw, vport)) {
+ /* This group is to hold FTE to match untagged packets when prio_tag
+ * is enabled.
+ */
+ memset(flow_group_in, 0, inlen);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in, match_criteria);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ ret = PTR_ERR(g);
+ esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
+ vport->vport, ret);
+ goto prio_tag_err;
+ }
+ vport->ingress.offloads.metadata_prio_tag_grp = g;
+ flow_index++;
+ }
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ /* This group holds an FTE with no match to add metadata for
+ * tagged packets if prio-tag is enabled, or for all untagged
+ * traffic in case prio-tag is disabled.
+ */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ ret = PTR_ERR(g);
+ esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+ vport->vport, ret);
+ goto metadata_err;
+ }
+ vport->ingress.offloads.metadata_allmatch_grp = g;
+ }
+
+ kvfree(flow_group_in);
+ return 0;
+
+metadata_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
+ vport->ingress.offloads.metadata_prio_tag_grp = NULL;
+ }
+prio_tag_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.offloads.drop_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp);
+ vport->ingress.offloads.drop_grp = NULL;
+ }
+drop_err:
+ kvfree(flow_group_in);
+ return ret;
+}
+
+static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport)
+{
+ if (vport->ingress.offloads.metadata_allmatch_grp) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp);
+ vport->ingress.offloads.metadata_allmatch_grp = NULL;
+ }
+
+ if (vport->ingress.offloads.metadata_prio_tag_grp) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
+ vport->ingress.offloads.metadata_prio_tag_grp = NULL;
+ }
+
+ if (vport->ingress.offloads.drop_grp) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp);
+ vport->ingress.offloads.drop_grp = NULL;
+ }
+}
+
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int num_ftes = 0;
+ int err;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ !esw_acl_ingress_prio_tag_enabled(esw, vport))
+ return 0;
+
+ esw_acl_ingress_allow_rule_destroy(vport);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ num_ftes++;
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ num_ftes++;
+ if (esw_acl_ingress_prio_tag_enabled(esw, vport))
+ num_ftes++;
+
+ vport->ingress.acl = esw_acl_table_create(esw, vport,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ num_ftes);
+ if (IS_ERR(vport->ingress.acl)) {
+ err = PTR_ERR(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+ return err;
+ }
+
+ err = esw_acl_ingress_ofld_groups_create(esw, vport);
+ if (err)
+ goto group_err;
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules\n", vport->vport);
+
+ err = esw_acl_ingress_ofld_rules_create(esw, vport);
+ if (err)
+ goto rules_err;
+
+ return 0;
+
+rules_err:
+ esw_acl_ingress_ofld_groups_destroy(vport);
+group_err:
+ esw_acl_ingress_table_destroy(vport);
+ return err;
+}
+
+void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ esw_acl_ingress_ofld_rules_destroy(esw, vport);
+ esw_acl_ingress_ofld_groups_destroy(vport);
+ esw_acl_ingress_table_destroy(vport);
+}
+
+/* Caller must hold rtnl_lock */
+int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num,
+ u32 metadata)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+ int err;
+
+ if (WARN_ON_ONCE(IS_ERR(vport))) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+ return PTR_ERR(vport);
+ }
+
+ esw_acl_ingress_ofld_rules_destroy(esw, vport);
+
+ vport->metadata = metadata ? metadata : vport->default_metadata;
+
+ /* Recreate ingress acl rules with vport->metadata */
+ err = esw_acl_ingress_ofld_rules_create(esw, vport);
+ if (err)
+ goto out;
+
+ return 0;
+
+out:
+ vport->metadata = vport->default_metadata;
+ return err;
+}
+
+int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (IS_ERR(vport)) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+ return PTR_ERR(vport);
+ }
+
+ return esw_acl_ingress_src_port_drop_create(esw, vport);
+}
+
+void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (WARN_ON_ONCE(IS_ERR(vport))) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+ return;
+ }
+
+ esw_acl_ingress_src_port_drop_destroy(esw, vport);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
new file mode 100644
index 000000000..44c152da3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_LGCY_H__
+#define __MLX5_ESWITCH_ACL_LGCY_H__
+
+#include "eswitch.h"
+
+/* Eswitch acl egress external APIs */
+int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+/* Eswitch acl ingress external APIs */
+int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+#endif /* __MLX5_ESWITCH_ACL_LGCY_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
new file mode 100644
index 000000000..11d3d3978
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_OFLD_H__
+#define __MLX5_ESWITCH_ACL_OFLD_H__
+
+#include "eswitch.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+/* Eswitch acl egress external APIs */
+int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport);
+int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
+ u16 passive_vport_num);
+int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num);
+
+static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch *esw)
+{
+ return esw && esw->mode == MLX5_ESWITCH_OFFLOADS &&
+ mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ MLX5_CAP_ESW_FLOWTABLE(esw->dev, egress_acl_forward_to_vport);
+}
+
+/* Eswitch acl ingress external APIs */
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num,
+ u32 metadata);
+void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num);
+
+#else /* CONFIG_MLX5_ESWITCH */
+static void
+mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{}
+
+static int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ return 0;
+}
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
new file mode 100644
index 000000000..d0b2676c3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -0,0 +1,1853 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/build_bug.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include <net/switchdev.h>
+#include "lib/devcom.h"
+#include "bridge.h"
+#include "eswitch.h"
+#include "bridge_priv.h"
+#define CREATE_TRACE_POINTS
+#include "diag/bridge_tracepoint.h"
+
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 12000
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE 16000
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 64000);
+
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 16000
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (32000 - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \
+ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1)
+static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 64000);
+
+#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0
+
+enum {
+ MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+ MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
+};
+
+static const struct rhashtable_params fdb_ht_params = {
+ .key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key),
+ .key_len = sizeof(struct mlx5_esw_bridge_fdb_key),
+ .head_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, ht_node),
+ .automatic_shrinking = true,
+};
+
+enum {
+ MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0),
+};
+
+struct mlx5_esw_bridge {
+ int ifindex;
+ int refcnt;
+ struct list_head list;
+ struct mlx5_esw_bridge_offloads *br_offloads;
+
+ struct list_head fdb_list;
+ struct rhashtable fdb_ht;
+
+ struct mlx5_flow_table *egress_ft;
+ struct mlx5_flow_group *egress_vlan_fg;
+ struct mlx5_flow_group *egress_qinq_fg;
+ struct mlx5_flow_group *egress_mac_fg;
+ struct mlx5_flow_group *egress_miss_fg;
+ struct mlx5_pkt_reformat *egress_miss_pkt_reformat;
+ struct mlx5_flow_handle *egress_miss_handle;
+ unsigned long ageing_time;
+ u32 flags;
+ u16 vlan_proto;
+};
+
+static void
+mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid,
+ unsigned long val)
+{
+ struct switchdev_notifier_fdb_info send_info = {};
+
+ send_info.addr = addr;
+ send_info.vid = vid;
+ send_info.offloaded = true;
+ call_switchdev_notifiers(val, dev, &send_info.info, NULL);
+}
+
+static void
+mlx5_esw_bridge_fdb_del_notify(struct mlx5_esw_bridge_fdb_entry *entry)
+{
+ if (!(entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | MLX5_ESW_BRIDGE_FLAG_PEER)))
+ mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+ entry->key.vid,
+ SWITCHDEV_FDB_DEL_TO_BRIDGE);
+}
+
+static bool mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(struct mlx5_eswitch *esw)
+{
+ return BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_remove)) &&
+ MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_size) >= sizeof(struct vlan_hdr) &&
+ MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_offset) >=
+ offsetof(struct vlan_ethhdr, h_vlan_proto);
+}
+
+static struct mlx5_pkt_reformat *
+mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw)
+{
+ struct mlx5_pkt_reformat_params reformat_params = {};
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_REMOVE_HDR;
+ reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START;
+ reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto);
+ reformat_params.size = sizeof(struct vlan_hdr);
+ return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB);
+}
+
+static struct mlx5_flow_table *
+mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *fdb;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!ns) {
+ esw_warn(dev, "Failed to get FDB namespace\n");
+ return ERR_PTR(-ENOENT);
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft_attr.max_fte = max_fte;
+ ft_attr.level = level;
+ ft_attr.prio = FDB_BR_OFFLOAD;
+ fdb = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(fdb))
+ esw_warn(dev, "Failed to create bridge FDB Table (err=%ld)\n", PTR_ERR(fdb));
+
+ return fdb;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
+
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ kvfree(in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create VLAN(proto=%x) flow group for bridge ingress table (err=%ld)\n",
+ vlan_proto, PTR_ERR(fg));
+
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_qinq_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw,
+ ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(unsigned int from, unsigned int to,
+ u16 vlan_proto, struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create bridge ingress table VLAN filter flow group (err=%ld)\n",
+ PTR_ERR(fg));
+ kvfree(in);
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_filter_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021Q, esw,
+ ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_qinq_filter_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ingress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021AD, esw,
+ ingress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
+
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(ingress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create MAC flow group for bridge ingress table (err=%ld)\n",
+ PTR_ERR(fg));
+
+ kvfree(in);
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *egress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0);
+ if (vlan_proto == ETH_P_8021Q)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+ else if (vlan_proto == ETH_P_8021AD)
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index, from);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, to);
+
+ fg = mlx5_create_flow_group(egress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create VLAN flow group for bridge egress table (err=%ld)\n",
+ PTR_ERR(fg));
+ kvfree(in);
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, egress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_qinq_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *egress_ft)
+{
+ unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM;
+ unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO;
+
+ return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, egress_ft);
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(egress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create bridge egress table MAC flow group (err=%ld)\n",
+ PTR_ERR(fg));
+ kvfree(in);
+ return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_miss_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index,
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM);
+ MLX5_SET(create_flow_group_in, in, end_flow_index,
+ MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO);
+
+ fg = mlx5_create_flow_group(egress_ft, in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev,
+ "Failed to create bridge egress table miss flow group (err=%ld)\n",
+ PTR_ERR(fg));
+ kvfree(in);
+ return fg;
+}
+
+static int
+mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_flow_group *mac_fg, *qinq_filter_fg, *qinq_fg, *vlan_filter_fg, *vlan_fg;
+ struct mlx5_flow_table *ingress_ft, *skip_ft;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ int err;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return -EOPNOTSUPP;
+
+ ingress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE,
+ MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
+ esw);
+ if (IS_ERR(ingress_ft))
+ return PTR_ERR(ingress_ft);
+
+ skip_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE,
+ MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
+ esw);
+ if (IS_ERR(skip_ft)) {
+ err = PTR_ERR(skip_ft);
+ goto err_skip_tbl;
+ }
+
+ vlan_fg = mlx5_esw_bridge_ingress_vlan_fg_create(esw, ingress_ft);
+ if (IS_ERR(vlan_fg)) {
+ err = PTR_ERR(vlan_fg);
+ goto err_vlan_fg;
+ }
+
+ vlan_filter_fg = mlx5_esw_bridge_ingress_vlan_filter_fg_create(esw, ingress_ft);
+ if (IS_ERR(vlan_filter_fg)) {
+ err = PTR_ERR(vlan_filter_fg);
+ goto err_vlan_filter_fg;
+ }
+
+ qinq_fg = mlx5_esw_bridge_ingress_qinq_fg_create(esw, ingress_ft);
+ if (IS_ERR(qinq_fg)) {
+ err = PTR_ERR(qinq_fg);
+ goto err_qinq_fg;
+ }
+
+ qinq_filter_fg = mlx5_esw_bridge_ingress_qinq_filter_fg_create(esw, ingress_ft);
+ if (IS_ERR(qinq_filter_fg)) {
+ err = PTR_ERR(qinq_filter_fg);
+ goto err_qinq_filter_fg;
+ }
+
+ mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(esw, ingress_ft);
+ if (IS_ERR(mac_fg)) {
+ err = PTR_ERR(mac_fg);
+ goto err_mac_fg;
+ }
+
+ br_offloads->ingress_ft = ingress_ft;
+ br_offloads->skip_ft = skip_ft;
+ br_offloads->ingress_vlan_fg = vlan_fg;
+ br_offloads->ingress_vlan_filter_fg = vlan_filter_fg;
+ br_offloads->ingress_qinq_fg = qinq_fg;
+ br_offloads->ingress_qinq_filter_fg = qinq_filter_fg;
+ br_offloads->ingress_mac_fg = mac_fg;
+ return 0;
+
+err_mac_fg:
+ mlx5_destroy_flow_group(qinq_filter_fg);
+err_qinq_filter_fg:
+ mlx5_destroy_flow_group(qinq_fg);
+err_qinq_fg:
+ mlx5_destroy_flow_group(vlan_filter_fg);
+err_vlan_filter_fg:
+ mlx5_destroy_flow_group(vlan_fg);
+err_vlan_fg:
+ mlx5_destroy_flow_table(skip_ft);
+err_skip_tbl:
+ mlx5_destroy_flow_table(ingress_ft);
+ return err;
+}
+
+static void
+mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ mlx5_destroy_flow_group(br_offloads->ingress_mac_fg);
+ br_offloads->ingress_mac_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_qinq_filter_fg);
+ br_offloads->ingress_qinq_filter_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_qinq_fg);
+ br_offloads->ingress_qinq_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_vlan_filter_fg);
+ br_offloads->ingress_vlan_filter_fg = NULL;
+ mlx5_destroy_flow_group(br_offloads->ingress_vlan_fg);
+ br_offloads->ingress_vlan_fg = NULL;
+ mlx5_destroy_flow_table(br_offloads->skip_ft);
+ br_offloads->skip_ft = NULL;
+ mlx5_destroy_flow_table(br_offloads->ingress_ft);
+ br_offloads->ingress_ft = NULL;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft,
+ struct mlx5_flow_table *skip_ft,
+ struct mlx5_pkt_reformat *pkt_reformat);
+
+static int
+mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_flow_group *miss_fg = NULL, *mac_fg, *vlan_fg, *qinq_fg;
+ struct mlx5_pkt_reformat *miss_pkt_reformat = NULL;
+ struct mlx5_flow_handle *miss_handle = NULL;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_flow_table *egress_ft;
+ int err;
+
+ egress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE,
+ MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+ esw);
+ if (IS_ERR(egress_ft))
+ return PTR_ERR(egress_ft);
+
+ vlan_fg = mlx5_esw_bridge_egress_vlan_fg_create(esw, egress_ft);
+ if (IS_ERR(vlan_fg)) {
+ err = PTR_ERR(vlan_fg);
+ goto err_vlan_fg;
+ }
+
+ qinq_fg = mlx5_esw_bridge_egress_qinq_fg_create(esw, egress_ft);
+ if (IS_ERR(qinq_fg)) {
+ err = PTR_ERR(qinq_fg);
+ goto err_qinq_fg;
+ }
+
+ mac_fg = mlx5_esw_bridge_egress_mac_fg_create(esw, egress_ft);
+ if (IS_ERR(mac_fg)) {
+ err = PTR_ERR(mac_fg);
+ goto err_mac_fg;
+ }
+
+ if (mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) {
+ miss_fg = mlx5_esw_bridge_egress_miss_fg_create(esw, egress_ft);
+ if (IS_ERR(miss_fg)) {
+ esw_warn(esw->dev, "Failed to create miss flow group (err=%ld)\n",
+ PTR_ERR(miss_fg));
+ miss_fg = NULL;
+ goto skip_miss_flow;
+ }
+
+ miss_pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw);
+ if (IS_ERR(miss_pkt_reformat)) {
+ esw_warn(esw->dev,
+ "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n",
+ PTR_ERR(miss_pkt_reformat));
+ miss_pkt_reformat = NULL;
+ mlx5_destroy_flow_group(miss_fg);
+ miss_fg = NULL;
+ goto skip_miss_flow;
+ }
+
+ miss_handle = mlx5_esw_bridge_egress_miss_flow_create(egress_ft,
+ br_offloads->skip_ft,
+ miss_pkt_reformat);
+ if (IS_ERR(miss_handle)) {
+ esw_warn(esw->dev, "Failed to create miss flow (err=%ld)\n",
+ PTR_ERR(miss_handle));
+ miss_handle = NULL;
+ mlx5_packet_reformat_dealloc(esw->dev, miss_pkt_reformat);
+ miss_pkt_reformat = NULL;
+ mlx5_destroy_flow_group(miss_fg);
+ miss_fg = NULL;
+ goto skip_miss_flow;
+ }
+ }
+skip_miss_flow:
+
+ bridge->egress_ft = egress_ft;
+ bridge->egress_vlan_fg = vlan_fg;
+ bridge->egress_qinq_fg = qinq_fg;
+ bridge->egress_mac_fg = mac_fg;
+ bridge->egress_miss_fg = miss_fg;
+ bridge->egress_miss_pkt_reformat = miss_pkt_reformat;
+ bridge->egress_miss_handle = miss_handle;
+ return 0;
+
+err_mac_fg:
+ mlx5_destroy_flow_group(qinq_fg);
+err_qinq_fg:
+ mlx5_destroy_flow_group(vlan_fg);
+err_vlan_fg:
+ mlx5_destroy_flow_table(egress_ft);
+ return err;
+}
+
+static void
+mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge)
+{
+ if (bridge->egress_miss_handle)
+ mlx5_del_flow_rules(bridge->egress_miss_handle);
+ if (bridge->egress_miss_pkt_reformat)
+ mlx5_packet_reformat_dealloc(bridge->br_offloads->esw->dev,
+ bridge->egress_miss_pkt_reformat);
+ if (bridge->egress_miss_fg)
+ mlx5_destroy_flow_group(bridge->egress_miss_fg);
+ mlx5_destroy_flow_group(bridge->egress_mac_fg);
+ mlx5_destroy_flow_group(bridge->egress_qinq_fg);
+ mlx5_destroy_flow_group(bridge->egress_vlan_fg);
+ mlx5_destroy_flow_table(bridge->egress_ft);
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+ struct mlx5_esw_bridge *bridge,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_destination dests[2] = {};
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ u8 *smac_v, *smac_c;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2;
+
+ smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+ outer_headers.smac_47_16);
+ ether_addr_copy(smac_v, addr);
+ smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+ outer_headers.smac_47_16);
+ eth_broadcast_addr(smac_c);
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
+
+ if (vlan && vlan->pkt_reformat_push) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.pkt_reformat = vlan->pkt_reformat_push;
+ flow_act.modify_hdr = vlan->pkt_mod_hdr_push_mark;
+ } else if (vlan) {
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
+ vlan->vid);
+ }
+
+ dests[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dests[0].ft = bridge->egress_ft;
+ dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dests[1].counter_id = counter_id;
+
+ handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, dests,
+ ARRAY_SIZE(dests));
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+ struct mlx5_esw_bridge *bridge)
+{
+ return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
+ bridge, bridge->br_offloads->esw);
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom;
+ static struct mlx5_flow_handle *handle;
+ struct mlx5_eswitch *peer_esw;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return ERR_PTR(-ENODEV);
+
+ handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
+ bridge, peer_esw);
+
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *addr,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+ .ft = br_offloads->skip_ft,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ u8 *smac_v, *smac_c;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2;
+
+ smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+ outer_headers.smac_47_16);
+ ether_addr_copy(smac_v, addr);
+ smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+ outer_headers.smac_47_16);
+ eth_broadcast_addr(smac_c);
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
+
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+
+ handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr,
+ struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_VPORT,
+ .vport.num = vport_num,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ .flags = FLOW_ACT_NO_APPEND,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ u8 *dmac_v, *dmac_c;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) &&
+ vport_num == MLX5_VPORT_UPLINK)
+ rule_spec->flow_context.flow_source =
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+ outer_headers.dmac_47_16);
+ ether_addr_copy(dmac_v, addr);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+ outer_headers.dmac_47_16);
+ eth_broadcast_addr(dmac_c);
+
+ if (vlan) {
+ if (vlan->pkt_reformat_pop) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act.pkt_reformat = vlan->pkt_reformat_pop;
+ }
+
+ if (bridge->vlan_proto == ETH_P_8021Q) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.cvlan_tag);
+ } else if (bridge->vlan_proto == ETH_P_8021AD) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+ outer_headers.svlan_tag);
+ }
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
+ vlan->vid);
+ }
+
+ if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ dest.vport.vhca_id = esw_owner_vhca_id;
+ }
+ handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft,
+ struct mlx5_flow_table *skip_ft,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+ .ft = skip_ft,
+ };
+ struct mlx5_flow_act flow_act = {
+ .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT,
+ .flags = FLOW_ACT_NO_APPEND,
+ .pkt_reformat = pkt_reformat,
+ };
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return ERR_PTR(-ENOMEM);
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_1,
+ ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN_MARK);
+
+ handle = mlx5_add_flow_rules(egress_ft, rule_spec, &flow_act, &dest, 1);
+
+ kvfree(rule_spec);
+ return handle;
+}
+
+static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge *bridge;
+ int err;
+
+ bridge = kvzalloc(sizeof(*bridge), GFP_KERNEL);
+ if (!bridge)
+ return ERR_PTR(-ENOMEM);
+
+ bridge->br_offloads = br_offloads;
+ err = mlx5_esw_bridge_egress_table_init(br_offloads, bridge);
+ if (err)
+ goto err_egress_tbl;
+
+ err = rhashtable_init(&bridge->fdb_ht, &fdb_ht_params);
+ if (err)
+ goto err_fdb_ht;
+
+ INIT_LIST_HEAD(&bridge->fdb_list);
+ bridge->ifindex = ifindex;
+ bridge->refcnt = 1;
+ bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME);
+ bridge->vlan_proto = ETH_P_8021Q;
+ list_add(&bridge->list, &br_offloads->bridges);
+
+ return bridge;
+
+err_fdb_ht:
+ mlx5_esw_bridge_egress_table_cleanup(bridge);
+err_egress_tbl:
+ kvfree(bridge);
+ return ERR_PTR(err);
+}
+
+static void mlx5_esw_bridge_get(struct mlx5_esw_bridge *bridge)
+{
+ bridge->refcnt++;
+}
+
+static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads,
+ struct mlx5_esw_bridge *bridge)
+{
+ if (--bridge->refcnt)
+ return;
+
+ mlx5_esw_bridge_egress_table_cleanup(bridge);
+ list_del(&bridge->list);
+ rhashtable_destroy(&bridge->fdb_ht);
+ kvfree(bridge);
+
+ if (list_empty(&br_offloads->bridges))
+ mlx5_esw_bridge_ingress_table_cleanup(br_offloads);
+}
+
+static struct mlx5_esw_bridge *
+mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge *bridge;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(bridge, &br_offloads->bridges, list) {
+ if (bridge->ifindex == ifindex) {
+ mlx5_esw_bridge_get(bridge);
+ return bridge;
+ }
+ }
+
+ if (!br_offloads->ingress_ft) {
+ int err = mlx5_esw_bridge_ingress_table_init(br_offloads);
+
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ bridge = mlx5_esw_bridge_create(ifindex, br_offloads);
+ if (IS_ERR(bridge) && list_empty(&br_offloads->bridges))
+ mlx5_esw_bridge_ingress_table_cleanup(br_offloads);
+ return bridge;
+}
+
+static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_owner_vhca_id)
+{
+ return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE;
+}
+
+static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port)
+{
+ return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id);
+}
+
+static int mlx5_esw_bridge_port_insert(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ return xa_insert(&br_offloads->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL);
+}
+
+static struct mlx5_esw_bridge_port *
+mlx5_esw_bridge_port_lookup(u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ return xa_load(&br_offloads->ports, mlx5_esw_bridge_port_key_from_data(vport_num,
+ esw_owner_vhca_id));
+}
+
+static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port));
+}
+
+static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry)
+{
+ trace_mlx5_esw_bridge_fdb_entry_refresh(entry);
+
+ mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+ entry->key.vid,
+ SWITCHDEV_FDB_ADD_TO_BRIDGE);
+}
+
+static void
+mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
+ struct mlx5_esw_bridge *bridge)
+{
+ trace_mlx5_esw_bridge_fdb_entry_cleanup(entry);
+
+ rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params);
+ mlx5_del_flow_rules(entry->egress_handle);
+ if (entry->filter_handle)
+ mlx5_del_flow_rules(entry->filter_handle);
+ mlx5_del_flow_rules(entry->ingress_handle);
+ mlx5_fc_destroy(bridge->br_offloads->esw->dev, entry->ingress_counter);
+ list_del(&entry->vlan_list);
+ list_del(&entry->list);
+ kvfree(entry);
+}
+
+static void
+mlx5_esw_bridge_fdb_entry_notify_and_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
+ struct mlx5_esw_bridge *bridge)
+{
+ mlx5_esw_bridge_fdb_del_notify(entry);
+ mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
+}
+
+static void mlx5_esw_bridge_fdb_flush(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+}
+
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_vlan_lookup(u16 vid, struct mlx5_esw_bridge_port *port)
+{
+ return xa_load(&port->vlans, vid);
+}
+
+static int
+mlx5_esw_bridge_vlan_push_create(u16 vlan_proto, struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_eswitch *esw)
+{
+ struct {
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ } vlan_hdr = { htons(vlan_proto), htons(vlan->vid) };
+ struct mlx5_pkt_reformat_params reformat_params = {};
+ struct mlx5_pkt_reformat *pkt_reformat;
+
+ if (!BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_insert)) ||
+ MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_size) < sizeof(vlan_hdr) ||
+ MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_offset) <
+ offsetof(struct vlan_ethhdr, h_vlan_proto)) {
+ esw_warn(esw->dev, "Packet reformat INSERT_HEADER is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ reformat_params.type = MLX5_REFORMAT_TYPE_INSERT_HDR;
+ reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START;
+ reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto);
+ reformat_params.size = sizeof(vlan_hdr);
+ reformat_params.data = &vlan_hdr;
+ pkt_reformat = mlx5_packet_reformat_alloc(esw->dev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(pkt_reformat)) {
+ esw_warn(esw->dev, "Failed to alloc packet reformat INSERT_HEADER (err=%ld)\n",
+ PTR_ERR(pkt_reformat));
+ return PTR_ERR(pkt_reformat);
+ }
+
+ vlan->pkt_reformat_push = pkt_reformat;
+ return 0;
+}
+
+static void
+mlx5_esw_bridge_vlan_push_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_push);
+ vlan->pkt_reformat_push = NULL;
+}
+
+static int
+mlx5_esw_bridge_vlan_pop_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ struct mlx5_pkt_reformat *pkt_reformat;
+
+ if (!mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) {
+ esw_warn(esw->dev, "Packet reformat REMOVE_HEADER is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw);
+ if (IS_ERR(pkt_reformat)) {
+ esw_warn(esw->dev, "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n",
+ PTR_ERR(pkt_reformat));
+ return PTR_ERR(pkt_reformat);
+ }
+
+ vlan->pkt_reformat_pop = pkt_reformat;
+ return 0;
+}
+
+static void
+mlx5_esw_bridge_vlan_pop_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_pop);
+ vlan->pkt_reformat_pop = NULL;
+}
+
+static int
+mlx5_esw_bridge_vlan_push_mark_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_modify_hdr *pkt_mod_hdr;
+
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_1);
+ MLX5_SET(set_action_in, action, offset, 8);
+ MLX5_SET(set_action_in, action, length, ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS);
+ MLX5_SET(set_action_in, action, data, ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN);
+
+ pkt_mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 1, action);
+ if (IS_ERR(pkt_mod_hdr))
+ return PTR_ERR(pkt_mod_hdr);
+
+ vlan->pkt_mod_hdr_push_mark = pkt_mod_hdr;
+ return 0;
+}
+
+static void
+mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+ mlx5_modify_header_dealloc(esw->dev, vlan->pkt_mod_hdr_push_mark);
+ vlan->pkt_mod_hdr_push_mark = NULL;
+}
+
+static int
+mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_eswitch *esw)
+{
+ int err;
+
+ if (flags & BRIDGE_VLAN_INFO_PVID) {
+ err = mlx5_esw_bridge_vlan_push_create(vlan_proto, vlan, esw);
+ if (err)
+ return err;
+
+ err = mlx5_esw_bridge_vlan_push_mark_create(vlan, esw);
+ if (err)
+ goto err_vlan_push_mark;
+ }
+
+ if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
+ err = mlx5_esw_bridge_vlan_pop_create(vlan, esw);
+ if (err)
+ goto err_vlan_pop;
+ }
+
+ return 0;
+
+err_vlan_pop:
+ if (vlan->pkt_mod_hdr_push_mark)
+ mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
+err_vlan_push_mark:
+ if (vlan->pkt_reformat_push)
+ mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
+ return err;
+}
+
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_bridge_port *port,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_vlan *vlan;
+ int err;
+
+ vlan = kvzalloc(sizeof(*vlan), GFP_KERNEL);
+ if (!vlan)
+ return ERR_PTR(-ENOMEM);
+
+ vlan->vid = vid;
+ vlan->flags = flags;
+ INIT_LIST_HEAD(&vlan->fdb_list);
+
+ err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, vlan, esw);
+ if (err)
+ goto err_vlan_push_pop;
+
+ err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL);
+ if (err)
+ goto err_xa_insert;
+
+ trace_mlx5_esw_bridge_vlan_create(vlan);
+ return vlan;
+
+err_xa_insert:
+ if (vlan->pkt_reformat_pop)
+ mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
+ if (vlan->pkt_mod_hdr_push_mark)
+ mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
+ if (vlan->pkt_reformat_push)
+ mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
+err_vlan_push_pop:
+ kvfree(vlan);
+ return ERR_PTR(err);
+}
+
+static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan)
+{
+ xa_erase(&port->vlans, vlan->vid);
+}
+
+static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_eswitch *esw = bridge->br_offloads->esw;
+ struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list)
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+
+ if (vlan->pkt_reformat_pop)
+ mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
+ if (vlan->pkt_mod_hdr_push_mark)
+ mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
+ if (vlan->pkt_reformat_push)
+ mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
+}
+
+static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge_vlan *vlan,
+ struct mlx5_esw_bridge *bridge)
+{
+ trace_mlx5_esw_bridge_vlan_cleanup(vlan);
+ mlx5_esw_bridge_vlan_flush(vlan, bridge);
+ mlx5_esw_bridge_vlan_erase(port, vlan);
+ kvfree(vlan);
+}
+
+static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_vlan *vlan;
+ unsigned long index;
+
+ xa_for_each(&port->vlans, index, vlan)
+ mlx5_esw_bridge_vlan_cleanup(port, vlan, bridge);
+}
+
+static int mlx5_esw_bridge_port_vlans_recreate(struct mlx5_esw_bridge_port *port,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_vlan *vlan;
+ unsigned long i;
+ int err;
+
+ xa_for_each(&port->vlans, i, vlan) {
+ mlx5_esw_bridge_vlan_flush(vlan, bridge);
+ err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, vlan,
+ br_offloads->esw);
+ if (err) {
+ esw_warn(br_offloads->esw->dev,
+ "Failed to create VLAN=%u(proto=%x) push/pop actions (vport=%u,err=%d)\n",
+ vlan->vid, bridge->vlan_proto, port->vport_num,
+ err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_esw_bridge_vlans_recreate(struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+ struct mlx5_esw_bridge_port *port;
+ unsigned long i;
+ int err;
+
+ xa_for_each(&br_offloads->ports, i, port) {
+ if (port->bridge != bridge)
+ continue;
+
+ err = mlx5_esw_bridge_port_vlans_recreate(port, bridge);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge_vlan *vlan;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, bridge->br_offloads);
+ if (!port) {
+ /* FDB is added asynchronously on wq while port might have been deleted
+ * concurrently. Report on 'info' logging level and skip the FDB offload.
+ */
+ esw_info(esw->dev, "Failed to lookup bridge port (vport=%u)\n", vport_num);
+ return ERR_PTR(-EINVAL);
+ }
+
+ vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+ if (!vlan) {
+ /* FDB is added asynchronously on wq while vlan might have been deleted
+ * concurrently. Report on 'info' logging level and skip the FDB offload.
+ */
+ esw_info(esw->dev, "Failed to lookup bridge port vlan metadata (vport=%u)\n",
+ vport_num);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return vlan;
+}
+
+static struct mlx5_esw_bridge_fdb_entry *
+mlx5_esw_bridge_fdb_lookup(struct mlx5_esw_bridge *bridge,
+ const unsigned char *addr, u16 vid)
+{
+ struct mlx5_esw_bridge_fdb_key key = {};
+
+ ether_addr_copy(key.addr, addr);
+ key.vid = vid;
+ return rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params);
+}
+
+static struct mlx5_esw_bridge_fdb_entry *
+mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ const unsigned char *addr, u16 vid, bool added_by_user, bool peer,
+ struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_esw_bridge_vlan *vlan = NULL;
+ struct mlx5_esw_bridge_fdb_entry *entry;
+ struct mlx5_flow_handle *handle;
+ struct mlx5_fc *counter;
+ int err;
+
+ if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) {
+ vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, esw_owner_vhca_id, bridge,
+ esw);
+ if (IS_ERR(vlan))
+ return ERR_CAST(vlan);
+ }
+
+ entry = mlx5_esw_bridge_fdb_lookup(bridge, addr, vid);
+ if (entry)
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+
+ entry = kvzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ ether_addr_copy(entry->key.addr, addr);
+ entry->key.vid = vid;
+ entry->dev = dev;
+ entry->vport_num = vport_num;
+ entry->esw_owner_vhca_id = esw_owner_vhca_id;
+ entry->lastuse = jiffies;
+ if (added_by_user)
+ entry->flags |= MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER;
+ if (peer)
+ entry->flags |= MLX5_ESW_BRIDGE_FLAG_PEER;
+
+ counter = mlx5_fc_create(esw->dev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_ingress_fc_create;
+ }
+ entry->ingress_counter = counter;
+
+ handle = peer ?
+ mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan,
+ mlx5_fc_id(counter), bridge) :
+ mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan,
+ mlx5_fc_id(counter), bridge);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n",
+ vport_num, err);
+ goto err_ingress_flow_create;
+ }
+ entry->ingress_handle = handle;
+
+ if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG) {
+ handle = mlx5_esw_bridge_ingress_filter_flow_create(vport_num, addr, bridge);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ esw_warn(esw->dev, "Failed to create ingress filter(vport=%u,err=%d)\n",
+ vport_num, err);
+ goto err_ingress_filter_flow_create;
+ }
+ entry->filter_handle = handle;
+ }
+
+ handle = mlx5_esw_bridge_egress_flow_create(vport_num, esw_owner_vhca_id, addr, vlan,
+ bridge);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n",
+ vport_num, err);
+ goto err_egress_flow_create;
+ }
+ entry->egress_handle = handle;
+
+ err = rhashtable_insert_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params);
+ if (err) {
+ esw_warn(esw->dev, "Failed to insert FDB flow(vport=%u,err=%d)\n", vport_num, err);
+ goto err_ht_init;
+ }
+
+ if (vlan)
+ list_add(&entry->vlan_list, &vlan->fdb_list);
+ else
+ INIT_LIST_HEAD(&entry->vlan_list);
+ list_add(&entry->list, &bridge->fdb_list);
+
+ trace_mlx5_esw_bridge_fdb_entry_init(entry);
+ return entry;
+
+err_ht_init:
+ mlx5_del_flow_rules(entry->egress_handle);
+err_egress_flow_create:
+ if (entry->filter_handle)
+ mlx5_del_flow_rules(entry->filter_handle);
+err_ingress_filter_flow_create:
+ mlx5_del_flow_rules(entry->ingress_handle);
+err_ingress_flow_create:
+ mlx5_fc_destroy(esw->dev, entry->ingress_counter);
+err_ingress_fc_create:
+ kvfree(entry);
+ return ERR_PTR(err);
+}
+
+int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return -EINVAL;
+
+ port->bridge->ageing_time = clock_t_to_jiffies(ageing_time);
+ return 0;
+}
+
+int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+ bool filtering;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return -EINVAL;
+
+ bridge = port->bridge;
+ filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
+ if (filtering == enable)
+ return 0;
+
+ mlx5_esw_bridge_fdb_flush(bridge);
+ if (enable)
+ bridge->flags |= MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
+ else
+ bridge->flags &= ~MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
+
+ return 0;
+}
+
+int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id,
+ br_offloads);
+ if (!port)
+ return -EINVAL;
+
+ bridge = port->bridge;
+ if (bridge->vlan_proto == proto)
+ return 0;
+ if (proto != ETH_P_8021Q && proto != ETH_P_8021AD) {
+ esw_warn(br_offloads->esw->dev, "Can't set unsupported VLAN protocol %x", proto);
+ return -EOPNOTSUPP;
+ }
+
+ mlx5_esw_bridge_fdb_flush(bridge);
+ bridge->vlan_proto = proto;
+ mlx5_esw_bridge_vlans_recreate(bridge);
+
+ return 0;
+}
+
+static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct mlx5_esw_bridge *bridge)
+{
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_esw_bridge_port *port;
+ int err;
+
+ port = kvzalloc(sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ port->vport_num = vport_num;
+ port->esw_owner_vhca_id = esw_owner_vhca_id;
+ port->bridge = bridge;
+ port->flags |= flags;
+ xa_init(&port->vlans);
+ err = mlx5_esw_bridge_port_insert(port, br_offloads);
+ if (err) {
+ esw_warn(esw->dev,
+ "Failed to insert port metadata (vport=%u,esw_owner_vhca_id=%u,err=%d)\n",
+ port->vport_num, port->esw_owner_vhca_id, err);
+ goto err_port_insert;
+ }
+ trace_mlx5_esw_bridge_vport_init(port);
+
+ return 0;
+
+err_port_insert:
+ kvfree(port);
+ return err;
+}
+
+static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads,
+ struct mlx5_esw_bridge_port *port)
+{
+ u16 vport_num = port->vport_num, esw_owner_vhca_id = port->esw_owner_vhca_id;
+ struct mlx5_esw_bridge *bridge = port->bridge;
+ struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
+ if (entry->vport_num == vport_num && entry->esw_owner_vhca_id == esw_owner_vhca_id)
+ mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
+
+ trace_mlx5_esw_bridge_vport_cleanup(port);
+ mlx5_esw_bridge_port_vlans_flush(port, bridge);
+ mlx5_esw_bridge_port_erase(port, br_offloads);
+ kvfree(port);
+ mlx5_esw_bridge_put(br_offloads, bridge);
+ return 0;
+}
+
+static int mlx5_esw_bridge_vport_link_with_flags(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_bridge *bridge;
+ int err;
+
+ bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads);
+ if (IS_ERR(bridge)) {
+ NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex");
+ return PTR_ERR(bridge);
+ }
+
+ err = mlx5_esw_bridge_vport_init(vport_num, esw_owner_vhca_id, flags, br_offloads, bridge);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error initializing port");
+ goto err_vport;
+ }
+ return 0;
+
+err_vport:
+ mlx5_esw_bridge_put(br_offloads, bridge);
+ return err;
+}
+
+int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, 0,
+ br_offloads, extack);
+}
+
+int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_bridge_port *port;
+ int err;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port) {
+ NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge");
+ return -EINVAL;
+ }
+ if (port->bridge->ifindex != ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge");
+ return -EINVAL;
+ }
+
+ err = mlx5_esw_bridge_vport_cleanup(br_offloads, port);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "Port cleanup failed");
+ return err;
+}
+
+int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch))
+ return 0;
+
+ return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id,
+ MLX5_ESW_BRIDGE_PORT_FLAG_PEER,
+ br_offloads, extack);
+}
+
+int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ return mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, br_offloads,
+ extack);
+}
+
+int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge_vlan *vlan;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return -EINVAL;
+
+ vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+ if (vlan) {
+ if (vlan->flags == flags)
+ return 0;
+ mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge);
+ }
+
+ vlan = mlx5_esw_bridge_vlan_create(port->bridge->vlan_proto, vid, flags, port,
+ br_offloads->esw);
+ if (IS_ERR(vlan)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry");
+ return PTR_ERR(vlan);
+ }
+ return 0;
+}
+
+void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge_vlan *vlan;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+ if (!vlan)
+ return;
+ mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge);
+}
+
+void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info)
+{
+ struct mlx5_esw_bridge_fdb_entry *entry;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ bridge = port->bridge;
+ entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
+ if (!entry) {
+ esw_debug(br_offloads->esw->dev,
+ "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n",
+ fdb_info->addr, fdb_info->vid, vport_num);
+ return;
+ }
+
+ entry->lastuse = jiffies;
+}
+
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info)
+{
+ struct mlx5_esw_bridge_fdb_entry *entry;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ bridge = port->bridge;
+ entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, esw_owner_vhca_id, fdb_info->addr,
+ fdb_info->vid, fdb_info->added_by_user,
+ port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER,
+ br_offloads->esw, bridge);
+ if (IS_ERR(entry))
+ return;
+
+ if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
+ mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
+ SWITCHDEV_FDB_OFFLOADED);
+ else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER))
+ /* Take over dynamic entries to prevent kernel bridge from aging them out. */
+ mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
+ SWITCHDEV_FDB_ADD_TO_BRIDGE);
+}
+
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info)
+{
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct mlx5_esw_bridge_fdb_entry *entry;
+ struct mlx5_esw_bridge_port *port;
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+ if (!port)
+ return;
+
+ bridge = port->bridge;
+ entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
+ if (!entry) {
+ esw_warn(esw->dev,
+ "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n",
+ fdb_info->addr, fdb_info->vid, vport_num);
+ return;
+ }
+
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+}
+
+void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+ struct mlx5_esw_bridge *bridge;
+
+ list_for_each_entry(bridge, &br_offloads->bridges, list) {
+ list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) {
+ unsigned long lastuse =
+ (unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter);
+
+ if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
+ continue;
+
+ if (time_after(lastuse, entry->lastuse))
+ mlx5_esw_bridge_fdb_entry_refresh(entry);
+ else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER) &&
+ time_is_before_jiffies(entry->lastuse + bridge->ageing_time))
+ mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge);
+ }
+ }
+}
+
+static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_esw_bridge_port *port;
+ unsigned long i;
+
+ xa_for_each(&br_offloads->ports, i, port)
+ mlx5_esw_bridge_vport_cleanup(br_offloads, port);
+
+ WARN_ONCE(!list_empty(&br_offloads->bridges),
+ "Cleaning up bridge offloads while still having bridges attached\n");
+}
+
+struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads;
+
+ ASSERT_RTNL();
+
+ br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL);
+ if (!br_offloads)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&br_offloads->bridges);
+ xa_init(&br_offloads->ports);
+ br_offloads->esw = esw;
+ esw->br_offloads = br_offloads;
+
+ return br_offloads;
+}
+
+void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads;
+
+ ASSERT_RTNL();
+
+ if (!br_offloads)
+ return;
+
+ mlx5_esw_bridge_flush(br_offloads);
+ WARN_ON(!xa_empty(&br_offloads->ports));
+
+ esw->br_offloads = NULL;
+ kvfree(br_offloads);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
new file mode 100644
index 000000000..10851a515
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_ESW_BRIDGE_H__
+#define __MLX5_ESW_BRIDGE_H__
+
+#include <linux/notifier.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/xarray.h>
+#include "eswitch.h"
+
+struct mlx5_flow_table;
+struct mlx5_flow_group;
+
+struct mlx5_esw_bridge_offloads {
+ struct mlx5_eswitch *esw;
+ struct list_head bridges;
+ struct xarray ports;
+
+ struct notifier_block netdev_nb;
+ struct notifier_block nb_blk;
+ struct notifier_block nb;
+ struct workqueue_struct *wq;
+ struct delayed_work update_work;
+
+ struct mlx5_flow_table *ingress_ft;
+ struct mlx5_flow_group *ingress_vlan_fg;
+ struct mlx5_flow_group *ingress_vlan_filter_fg;
+ struct mlx5_flow_group *ingress_qinq_fg;
+ struct mlx5_flow_group *ingress_qinq_filter_fg;
+ struct mlx5_flow_group *ingress_mac_fg;
+
+ struct mlx5_flow_table *skip_ft;
+};
+
+struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw);
+void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw);
+int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
+ struct mlx5_esw_bridge_offloads *br_offloads,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
+ struct mlx5_esw_bridge_offloads *br_offloads);
+
+#endif /* __MLX5_ESW_BRIDGE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
new file mode 100644
index 000000000..878311fe9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef _MLX5_ESW_BRIDGE_PRIVATE_
+#define _MLX5_ESW_BRIDGE_PRIVATE_
+
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#include <linux/rhashtable.h>
+#include <linux/xarray.h>
+#include "fs_core.h"
+
+struct mlx5_esw_bridge_fdb_key {
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+};
+
+enum {
+ MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
+ MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1),
+};
+
+enum {
+ MLX5_ESW_BRIDGE_PORT_FLAG_PEER = BIT(0),
+};
+
+struct mlx5_esw_bridge_fdb_entry {
+ struct mlx5_esw_bridge_fdb_key key;
+ struct rhash_head ht_node;
+ struct net_device *dev;
+ struct list_head list;
+ struct list_head vlan_list;
+ u16 vport_num;
+ u16 esw_owner_vhca_id;
+ u16 flags;
+
+ struct mlx5_flow_handle *ingress_handle;
+ struct mlx5_fc *ingress_counter;
+ unsigned long lastuse;
+ struct mlx5_flow_handle *egress_handle;
+ struct mlx5_flow_handle *filter_handle;
+};
+
+struct mlx5_esw_bridge_vlan {
+ u16 vid;
+ u16 flags;
+ struct list_head fdb_list;
+ struct mlx5_pkt_reformat *pkt_reformat_push;
+ struct mlx5_pkt_reformat *pkt_reformat_pop;
+ struct mlx5_modify_hdr *pkt_mod_hdr_push_mark;
+};
+
+struct mlx5_esw_bridge_port {
+ u16 vport_num;
+ u16 esw_owner_vhca_id;
+ u16 flags;
+ struct mlx5_esw_bridge *bridge;
+ struct xarray vlans;
+};
+
+#endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
new file mode 100644
index 000000000..2db13c71e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/debugfs.h>
+#include "eswitch.h"
+
+enum vnic_diag_counter {
+ MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE,
+ MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW,
+ MLX5_VNIC_DIAG_COMP_EQ_OVERRUN,
+ MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN,
+ MLX5_VNIC_DIAG_CQ_OVERRUN,
+ MLX5_VNIC_DIAG_INVALID_COMMAND,
+ MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
+};
+
+static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
+ u32 *val)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+ struct mlx5_core_dev *dev = vport->dev;
+ u16 vport_num = vport->vport;
+ void *vnic_diag_out;
+ int err;
+
+ MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+ MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
+ if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num))
+ MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env);
+ switch (counter) {
+ case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues);
+ break;
+ case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out,
+ send_queue_priority_update_flow);
+ break;
+ case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun);
+ break;
+ case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun);
+ break;
+ case MLX5_VNIC_DIAG_CQ_OVERRUN:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun);
+ break;
+ case MLX5_VNIC_DIAG_INVALID_COMMAND:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command);
+ break;
+ case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
+ *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
+ break;
+ }
+
+ return 0;
+}
+
+static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
+ enum vnic_diag_counter type)
+{
+ u32 val = 0;
+ int ret;
+
+ ret = mlx5_esw_query_vnic_diag(vport, type, &val);
+ if (ret)
+ return ret;
+
+ seq_printf(file, "%d\n", val);
+ return 0;
+}
+
+static int total_q_under_processor_handle_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE);
+}
+
+static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private,
+ MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW);
+}
+
+static int comp_eq_overrun_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN);
+}
+
+static int async_eq_overrun_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN);
+}
+
+static int cq_overrun_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN);
+}
+
+static int invalid_command_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND);
+}
+
+static int quota_exceeded_command_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
+}
+
+DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
+DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
+DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(cq_overrun);
+DEFINE_SHOW_ATTRIBUTE(invalid_command);
+DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
+
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ debugfs_remove_recursive(vport->dbgfs);
+ vport->dbgfs = NULL;
+}
+
+/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */
+#define VNIC_DIAG_DIR_NAME_MAX_LEN 8
+
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+ struct dentry *vnic_diag;
+ char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN];
+ int err;
+
+ if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ return;
+
+ if (vport_num == MLX5_VPORT_PF) {
+ strcpy(dir_name, "pf");
+ } else if (vport_num == MLX5_VPORT_ECPF) {
+ strcpy(dir_name, "ecpf");
+ } else {
+ err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf",
+ is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF);
+ if (WARN_ON(err < 0))
+ return;
+ }
+
+ vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs);
+ vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs);
+
+ if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) {
+ debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport,
+ &total_q_under_processor_handle_fops);
+ debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport,
+ &send_queue_priority_update_flow_fops);
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) {
+ debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport,
+ &comp_eq_overrun_fops);
+ debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport,
+ &async_eq_overrun_fops);
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun))
+ debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops);
+
+ if (MLX5_CAP_GEN(esw->dev, invalid_command_count))
+ debugfs_create_file("invalid_command", 0444, vnic_diag, vport,
+ &invalid_command_fops);
+
+ if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
+ debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
+ &quota_exceeded_command_fops);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
new file mode 100644
index 000000000..9bc7be95d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd. */
+
+#include <linux/mlx5/driver.h>
+#include "eswitch.h"
+
+static void
+mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
+{
+ u64 parent_id;
+
+ parent_id = mlx5_query_nic_system_image_guid(dev);
+ ppid->id_len = sizeof(parent_id);
+ memcpy(ppid->id, &parent_id, sizeof(parent_id));
+}
+
+static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num == MLX5_VPORT_UPLINK ||
+ (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) ||
+ mlx5_eswitch_is_vf_vport(esw, vport_num);
+}
+
+static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct devlink_port_attrs attrs = {};
+ struct netdev_phys_item_id ppid = {};
+ struct devlink_port *dl_port;
+ u32 controller_num = 0;
+ bool external;
+ u16 pfnum;
+
+ dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL);
+ if (!dl_port)
+ return NULL;
+
+ mlx5_esw_get_port_parent_id(dev, &ppid);
+ pfnum = mlx5_get_dev_index(dev);
+ external = mlx5_core_is_ecpf_esw_manager(dev);
+ if (external)
+ controller_num = dev->priv.eswitch->offloads.host_number + 1;
+
+ if (vport_num == MLX5_VPORT_UPLINK) {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.phys.port_number = pfnum;
+ memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
+ attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_set(dl_port, &attrs);
+ } else if (vport_num == MLX5_VPORT_PF) {
+ memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
+ dl_port->attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external);
+ } else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) {
+ memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
+ dl_port->attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+ vport_num - 1, external);
+ }
+ return dl_port;
+}
+
+static void mlx5_esw_dl_port_free(struct devlink_port *dl_port)
+{
+ kfree(dl_port);
+}
+
+int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct devlink_port *dl_port;
+ unsigned int dl_port_index;
+ struct mlx5_vport *vport;
+ struct devlink *devlink;
+ int err;
+
+ if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+ return 0;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ dl_port = mlx5_esw_dl_port_alloc(esw, vport_num);
+ if (!dl_port)
+ return -ENOMEM;
+
+ devlink = priv_to_devlink(dev);
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
+ err = devl_port_register(devlink, dl_port, dl_port_index);
+ if (err)
+ goto reg_err;
+
+ err = devl_rate_leaf_create(dl_port, vport);
+ if (err)
+ goto rate_err;
+
+ vport->dl_port = dl_port;
+ return 0;
+
+rate_err:
+ devl_port_unregister(dl_port);
+reg_err:
+ mlx5_esw_dl_port_free(dl_port);
+ return err;
+}
+
+void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+ return;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return;
+
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+ devl_rate_leaf_destroy(vport->dl_port);
+ }
+
+ devl_port_unregister(vport->dl_port);
+ mlx5_esw_dl_port_free(vport->dl_port);
+ vport->dl_port = NULL;
+}
+
+struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port;
+}
+
+int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
+ u16 vport_num, u32 controller, u32 sfnum)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct netdev_phys_item_id ppid = {};
+ unsigned int dl_port_index;
+ struct mlx5_vport *vport;
+ struct devlink *devlink;
+ u16 pfnum;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ pfnum = mlx5_get_dev_index(dev);
+ mlx5_esw_get_port_parent_id(dev, &ppid);
+ memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len);
+ dl_port->attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller);
+ devlink = priv_to_devlink(dev);
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
+ err = devl_port_register(devlink, dl_port, dl_port_index);
+ if (err)
+ return err;
+
+ err = devl_rate_leaf_create(dl_port, vport);
+ if (err)
+ goto rate_err;
+
+ vport->dl_port = dl_port;
+ return 0;
+
+rate_err:
+ devl_port_unregister(dl_port);
+ return err;
+}
+
+void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return;
+
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+ devl_rate_leaf_destroy(vport->dl_port);
+ }
+
+ devl_port_unregister(vport->dl_port);
+ vport->dl_port = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
new file mode 100644
index 000000000..51ac24e6e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_ESW_BRIDGE_TRACEPOINT_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_ESW_BRIDGE_TRACEPOINT_
+
+#include <linux/tracepoint.h>
+#include "../bridge_priv.h"
+
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template,
+ TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+ TP_ARGS(fdb),
+ TP_STRUCT__entry(
+ __array(char, dev_name, IFNAMSIZ)
+ __array(unsigned char, addr, ETH_ALEN)
+ __field(u16, vid)
+ __field(u16, flags)
+ __field(unsigned int, used)
+ ),
+ TP_fast_assign(
+ strscpy(__entry->dev_name,
+ netdev_name(fdb->dev),
+ IFNAMSIZ);
+ memcpy(__entry->addr, fdb->key.addr, ETH_ALEN);
+ __entry->vid = fdb->key.vid;
+ __entry->flags = fdb->flags;
+ __entry->used = jiffies_to_msecs(jiffies - fdb->lastuse)
+ ),
+ TP_printk("net_device=%s addr=%pM vid=%hu flags=%hx used=%u",
+ __entry->dev_name,
+ __entry->addr,
+ __entry->vid,
+ __entry->flags,
+ __entry->used / 1000)
+ );
+
+DEFINE_EVENT(mlx5_esw_bridge_fdb_template,
+ mlx5_esw_bridge_fdb_entry_init,
+ TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+ TP_ARGS(fdb)
+ );
+DEFINE_EVENT(mlx5_esw_bridge_fdb_template,
+ mlx5_esw_bridge_fdb_entry_refresh,
+ TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+ TP_ARGS(fdb)
+ );
+DEFINE_EVENT(mlx5_esw_bridge_fdb_template,
+ mlx5_esw_bridge_fdb_entry_cleanup,
+ TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+ TP_ARGS(fdb)
+ );
+
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_vlan_template,
+ TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan),
+ TP_ARGS(vlan),
+ TP_STRUCT__entry(
+ __field(u16, vid)
+ __field(u16, flags)
+ ),
+ TP_fast_assign(
+ __entry->vid = vlan->vid;
+ __entry->flags = vlan->flags;
+ ),
+ TP_printk("vid=%hu flags=%hx",
+ __entry->vid,
+ __entry->flags)
+ );
+
+DEFINE_EVENT(mlx5_esw_bridge_vlan_template,
+ mlx5_esw_bridge_vlan_create,
+ TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan),
+ TP_ARGS(vlan)
+ );
+DEFINE_EVENT(mlx5_esw_bridge_vlan_template,
+ mlx5_esw_bridge_vlan_cleanup,
+ TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan),
+ TP_ARGS(vlan)
+ );
+
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template,
+ TP_PROTO(const struct mlx5_esw_bridge_port *port),
+ TP_ARGS(port),
+ TP_STRUCT__entry(
+ __field(u16, vport_num)
+ __field(u16, esw_owner_vhca_id)
+ __field(u16, flags)
+ ),
+ TP_fast_assign(
+ __entry->vport_num = port->vport_num;
+ __entry->esw_owner_vhca_id = port->esw_owner_vhca_id;
+ __entry->flags = port->flags;
+ ),
+ TP_printk("vport_num=%hu esw_owner_vhca_id=%hu flags=%hx",
+ __entry->vport_num,
+ __entry->esw_owner_vhca_id,
+ __entry->flags)
+ );
+
+DEFINE_EVENT(mlx5_esw_bridge_port_template,
+ mlx5_esw_bridge_vport_init,
+ TP_PROTO(const struct mlx5_esw_bridge_port *port),
+ TP_ARGS(port)
+ );
+DEFINE_EVENT(mlx5_esw_bridge_port_template,
+ mlx5_esw_bridge_vport_cleanup,
+ TP_PROTO(const struct mlx5_esw_bridge_port *port),
+ TP_ARGS(port)
+ );
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH esw/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE bridge_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h
new file mode 100644
index 000000000..458baf0c6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_ESW_TP_
+
+#include <linux/tracepoint.h>
+#include "eswitch.h"
+
+TRACE_EVENT(mlx5_esw_vport_qos_destroy,
+ TP_PROTO(const struct mlx5_vport *vport),
+ TP_ARGS(vport),
+ TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+ __field(unsigned short, vport_id)
+ __field(unsigned int, tsar_ix)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+ __entry->vport_id = vport->vport;
+ __entry->tsar_ix = vport->qos.esw_tsar_ix;
+ ),
+ TP_printk("(%s) vport=%hu tsar_ix=%u\n",
+ __get_str(devname), __entry->vport_id, __entry->tsar_ix
+ )
+);
+
+DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate),
+ TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+ __field(unsigned short, vport_id)
+ __field(unsigned int, tsar_ix)
+ __field(unsigned int, bw_share)
+ __field(unsigned int, max_rate)
+ __field(void *, group)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+ __entry->vport_id = vport->vport;
+ __entry->tsar_ix = vport->qos.esw_tsar_ix;
+ __entry->bw_share = bw_share;
+ __entry->max_rate = max_rate;
+ __entry->group = vport->qos.group;
+ ),
+ TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n",
+ __get_str(devname), __entry->vport_id, __entry->tsar_ix,
+ __entry->bw_share, __entry->max_rate, __entry->group
+ )
+);
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate)
+ );
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate)
+ );
+
+DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const void *, group)
+ __field(unsigned int, tsar_ix)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->group = group;
+ __entry->tsar_ix = tsar_ix;
+ ),
+ TP_printk("(%s) group=%p tsar_ix=%u\n",
+ __get_str(devname), __entry->group, __entry->tsar_ix
+ )
+);
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix)
+ );
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix)
+ );
+
+TRACE_EVENT(mlx5_esw_group_qos_config,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix, u32 bw_share, u32 max_rate),
+ TP_ARGS(dev, group, tsar_ix, bw_share, max_rate),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const void *, group)
+ __field(unsigned int, tsar_ix)
+ __field(unsigned int, bw_share)
+ __field(unsigned int, max_rate)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->group = group;
+ __entry->tsar_ix = tsar_ix;
+ __entry->bw_share = bw_share;
+ __entry->max_rate = max_rate;
+ ),
+ TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n",
+ __get_str(devname), __entry->group, __entry->tsar_ix,
+ __entry->bw_share, __entry->max_rate
+ )
+);
+#endif /* _MLX5_ESW_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH esw/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE qos_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
new file mode 100644
index 000000000..8a94870c5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "en.h"
+#include "en_tc.h"
+#include "fs_core.h"
+#include "esw/indir_table.h"
+#include "lib/fs_chains.h"
+#include "en/mod_hdr.h"
+
+#define MLX5_ESW_INDIR_TABLE_SIZE 2
+#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 2)
+#define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1)
+
+struct mlx5_esw_indir_table_rule {
+ struct mlx5_flow_handle *handle;
+ struct mlx5_modify_hdr *mh;
+ refcount_t refcnt;
+};
+
+struct mlx5_esw_indir_table_entry {
+ struct hlist_node hlist;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *recirc_grp;
+ struct mlx5_flow_group *fwd_grp;
+ struct mlx5_flow_handle *fwd_rule;
+ struct mlx5_esw_indir_table_rule *recirc_rule;
+ int fwd_ref;
+
+ u16 vport;
+};
+
+struct mlx5_esw_indir_table {
+ struct mutex lock; /* protects table */
+ DECLARE_HASHTABLE(table, 8);
+};
+
+struct mlx5_esw_indir_table *
+mlx5_esw_indir_table_init(void)
+{
+ struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL);
+
+ if (!indir)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&indir->lock);
+ hash_init(indir->table);
+ return indir;
+}
+
+void
+mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
+{
+ mutex_destroy(&indir->lock);
+ kvfree(indir);
+}
+
+bool
+mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport_num,
+ struct mlx5_core_dev *dest_mdev)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ bool vf_sf_vport;
+
+ vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) ||
+ mlx5_esw_is_sf_vport(esw, vport_num);
+
+ /* Use indirect table for all IP traffic from UL to VF with vport
+ * destination when source rewrite flag is set.
+ */
+ return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
+ vf_sf_vport &&
+ esw->dev == dest_mdev &&
+ attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE;
+}
+
+u16
+mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0;
+}
+
+static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_esw_indir_table_rule *rule;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+ int err = 0;
+ u32 data;
+
+ if (e->recirc_rule) {
+ refcount_inc(&e->recirc_rule->refcnt);
+ return 0;
+ }
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return -ENOMEM;
+
+ /* Modify flow source to recirculate packet */
+ data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport);
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ VPORT_TO_REG, data);
+ if (err)
+ goto err_mod_hdr_regc0;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT);
+ if (err)
+ goto err_mod_hdr_regc1;
+
+ flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
+ mod_acts.num_actions, mod_acts.actions);
+ if (IS_ERR(flow_act.modify_hdr)) {
+ err = PTR_ERR(flow_act.modify_hdr);
+ goto err_mod_hdr_alloc;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
+ flow_act.fg = e->recirc_grp;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = mlx5_chains_get_table(chains, 0, 1, 0);
+ if (IS_ERR(dest.ft)) {
+ err = PTR_ERR(dest.ft);
+ goto err_table;
+ }
+ handle = mlx5_add_flow_rules(e->ft, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto err_handle;
+ }
+
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ rule->handle = handle;
+ rule->mh = flow_act.modify_hdr;
+ refcount_set(&rule->refcnt, 1);
+ e->recirc_rule = rule;
+ return 0;
+
+err_handle:
+ mlx5_chains_put_table(chains, 0, 1, 0);
+err_table:
+ mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr);
+err_mod_hdr_alloc:
+err_mod_hdr_regc1:
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+err_mod_hdr_regc0:
+ kfree(rule);
+ return err;
+}
+
+static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ struct mlx5_esw_indir_table_rule *rule = e->recirc_rule;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+
+ if (!rule)
+ return;
+
+ if (!refcount_dec_and_test(&rule->refcnt))
+ return;
+
+ mlx5_del_flow_rules(rule->handle);
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ mlx5_modify_header_dealloc(esw->dev, rule->mh);
+ kfree(rule);
+ e->recirc_rule = NULL;
+}
+
+static int mlx5_create_indir_recirc_group(struct mlx5_esw_indir_table_entry *e)
+{
+ int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX);
+ e->recirc_grp = mlx5_create_flow_group(e->ft, in);
+ if (IS_ERR(e->recirc_grp))
+ err = PTR_ERR(e->recirc_grp);
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ /* Hold one entry */
+ MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
+ e->fwd_grp = mlx5_create_flow_group(e->ft, in);
+ if (IS_ERR(e->fwd_grp)) {
+ err = PTR_ERR(e->fwd_grp);
+ goto err_out;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_act.fg = e->fwd_grp;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = e->vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(e->fwd_rule)) {
+ mlx5_destroy_flow_group(e->fwd_grp);
+ err = PTR_ERR(e->fwd_rule);
+ }
+
+err_out:
+ kvfree(spec);
+ kvfree(in);
+ return err;
+}
+
+static struct mlx5_esw_indir_table_entry *
+mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
+ u16 vport, bool decap)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_esw_indir_table_entry *e;
+ struct mlx5_flow_table *ft;
+ int err = 0;
+
+ root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns)
+ return ERR_PTR(-ENOENT);
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return ERR_PTR(-ENOMEM);
+
+ ft_attr.prio = FDB_TC_OFFLOAD;
+ ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE;
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.level = 1;
+
+ ft = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto tbl_err;
+ }
+ e->ft = ft;
+ e->vport = vport;
+ e->fwd_ref = !decap;
+
+ err = mlx5_create_indir_recirc_group(e);
+ if (err)
+ goto recirc_grp_err;
+
+ if (decap) {
+ err = mlx5_esw_indir_table_rule_get(esw, attr, e);
+ if (err)
+ goto recirc_rule_err;
+ }
+
+ err = mlx5_create_indir_fwd_group(esw, e);
+ if (err)
+ goto fwd_grp_err;
+
+ hash_add(esw->fdb_table.offloads.indir->table, &e->hlist,
+ vport << 16);
+
+ return e;
+
+fwd_grp_err:
+ if (decap)
+ mlx5_esw_indir_table_rule_put(esw, e);
+recirc_rule_err:
+ mlx5_destroy_flow_group(e->recirc_grp);
+recirc_grp_err:
+ mlx5_destroy_flow_table(e->ft);
+tbl_err:
+ kfree(e);
+ return ERR_PTR(err);
+}
+
+static struct mlx5_esw_indir_table_entry *
+mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport)
+{
+ struct mlx5_esw_indir_table_entry *e;
+ u32 key = vport << 16;
+
+ hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key)
+ if (e->vport == vport)
+ return e;
+
+ return NULL;
+}
+
+struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport, bool decap)
+{
+ struct mlx5_esw_indir_table_entry *e;
+ int err;
+
+ mutex_lock(&esw->fdb_table.offloads.indir->lock);
+ e = mlx5_esw_indir_table_entry_lookup(esw, vport);
+ if (e) {
+ if (!decap) {
+ e->fwd_ref++;
+ } else {
+ err = mlx5_esw_indir_table_rule_get(esw, attr, e);
+ if (err)
+ goto out_err;
+ }
+ } else {
+ e = mlx5_esw_indir_table_entry_create(esw, attr, vport, decap);
+ if (IS_ERR(e)) {
+ err = PTR_ERR(e);
+ esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err);
+ goto out_err;
+ }
+ }
+ mutex_unlock(&esw->fdb_table.offloads.indir->lock);
+ return e->ft;
+
+out_err:
+ mutex_unlock(&esw->fdb_table.offloads.indir->lock);
+ return ERR_PTR(err);
+}
+
+void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
+ u16 vport, bool decap)
+{
+ struct mlx5_esw_indir_table_entry *e;
+
+ mutex_lock(&esw->fdb_table.offloads.indir->lock);
+ e = mlx5_esw_indir_table_entry_lookup(esw, vport);
+ if (!e)
+ goto out;
+
+ if (!decap)
+ e->fwd_ref--;
+ else
+ mlx5_esw_indir_table_rule_put(esw, e);
+
+ if (e->fwd_ref || e->recirc_rule)
+ goto out;
+
+ hash_del(&e->hlist);
+ mlx5_destroy_flow_group(e->recirc_grp);
+ mlx5_del_flow_rules(e->fwd_rule);
+ mlx5_destroy_flow_group(e->fwd_grp);
+ mlx5_destroy_flow_table(e->ft);
+ kfree(e);
+out:
+ mutex_unlock(&esw->fdb_table.offloads.indir->lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
new file mode 100644
index 000000000..036f5b3a3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_ESW_FT_H__
+#define __MLX5_ESW_FT_H__
+
+#ifdef CONFIG_MLX5_CLS_ACT
+
+struct mlx5_esw_indir_table *
+mlx5_esw_indir_table_init(void);
+void
+mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir);
+
+struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport, bool decap);
+void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
+ u16 vport, bool decap);
+
+bool
+mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport_num,
+ struct mlx5_core_dev *dest_mdev);
+
+u16
+mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr);
+
+#else
+/* indir API stubs */
+static inline struct mlx5_esw_indir_table *
+mlx5_esw_indir_table_init(void)
+{
+ return NULL;
+}
+
+static inline void
+mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
+{
+}
+
+static inline struct mlx5_flow_table *
+mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport, bool decap)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
+ u16 vport, bool decap)
+{
+}
+
+static inline bool
+mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport_num,
+ struct mlx5_core_dev *dest_mdev)
+{
+ return false;
+}
+
+static inline u16
+mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
+{
+ return 0;
+}
+#endif
+
+#endif /* __MLX5_ESW_FT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
new file mode 100644
index 000000000..fabe49a35
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "esw/acl/lgcy.h"
+#include "esw/legacy.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "fs_core.h"
+#include "fs_ft_pool.h"
+#include "esw/qos.h"
+
+enum {
+ LEGACY_VEPA_PRIO = 0,
+ LEGACY_FDB_PRIO,
+};
+
+static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb;
+ int err;
+
+ root_ns = mlx5_get_fdb_sub_ns(dev, 0);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* num FTE 2, num FG 2 */
+ ft_attr.prio = LEGACY_VEPA_PRIO;
+ ft_attr.max_fte = 2;
+ ft_attr.autogroup.max_num_groups = 2;
+ fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create VEPA FDB err %d\n", err);
+ return err;
+ }
+ esw->fdb_table.legacy.vepa_fdb = fdb;
+
+ return 0;
+}
+
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+ esw_debug(esw->dev, "Destroy FDB Table\n");
+ if (!esw->fdb_table.legacy.fdb)
+ return;
+
+ if (esw->fdb_table.legacy.promisc_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+ if (esw->fdb_table.legacy.allmulti_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+ if (esw->fdb_table.legacy.addr_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+ mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
+
+ esw->fdb_table.legacy.fdb = NULL;
+ esw->fdb_table.legacy.addr_grp = NULL;
+ esw->fdb_table.legacy.allmulti_grp = NULL;
+ esw->fdb_table.legacy.promisc_grp = NULL;
+ atomic64_set(&esw->user_count, 0);
+}
+
+static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int table_size;
+ u32 *flow_group_in;
+ u8 *dmac;
+ int err = 0;
+
+ esw_debug(dev, "Create FDB log_max_size(%d)\n",
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+ root_ns = mlx5_get_fdb_sub_ns(dev, 0);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ ft_attr.max_fte = POOL_NEXT_SIZE;
+ ft_attr.prio = LEGACY_FDB_PRIO;
+ fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create FDB Table err %d\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.fdb = fdb;
+ table_size = fdb->max_fte;
+
+ /* Addresses group : Full match unicast/multicast addresses */
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ /* Preserve 2 entries for allmulti and promisc rules*/
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3);
+ eth_broadcast_addr(dmac);
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.addr_grp = g;
+
+ /* Allmulti group : One rule that forwards any mcast traffic */
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2);
+ eth_zero_addr(dmac);
+ dmac[0] = 0x01;
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.allmulti_grp = g;
+
+ /* Promiscuous group :
+ * One rule that forward all unmatched traffic from previous groups
+ */
+ eth_zero_addr(dmac);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.promisc_grp = g;
+
+out:
+ if (err)
+ esw_destroy_legacy_fdb_table(esw);
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+ esw_debug(esw->dev, "Destroy VEPA Table\n");
+ if (!esw->fdb_table.legacy.vepa_fdb)
+ return;
+
+ mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb);
+ esw->fdb_table.legacy.vepa_fdb = NULL;
+}
+
+static int esw_create_legacy_table(struct mlx5_eswitch *esw)
+{
+ int err;
+
+ memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
+ atomic64_set(&esw->user_count, 0);
+
+ err = esw_create_legacy_vepa_table(esw);
+ if (err)
+ return err;
+
+ err = esw_create_legacy_fdb_table(esw);
+ if (err)
+ esw_destroy_legacy_vepa_table(esw);
+
+ return err;
+}
+
+static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw)
+{
+ if (esw->fdb_table.legacy.vepa_uplink_rule)
+ mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule);
+
+ if (esw->fdb_table.legacy.vepa_star_rule)
+ mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule);
+
+ esw->fdb_table.legacy.vepa_uplink_rule = NULL;
+ esw->fdb_table.legacy.vepa_star_rule = NULL;
+}
+
+static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
+{
+ esw_cleanup_vepa_rules(esw);
+ esw_destroy_legacy_fdb_table(esw);
+ esw_destroy_legacy_vepa_table(esw);
+}
+
+#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \
+ MLX5_VPORT_MC_ADDR_CHANGE | \
+ MLX5_VPORT_PROMISC_CHANGE)
+
+int esw_legacy_enable(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int ret;
+
+ ret = esw_create_legacy_table(esw);
+ if (ret)
+ return ret;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+
+ ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS);
+ if (ret)
+ esw_destroy_legacy_table(esw);
+ return ret;
+}
+
+void esw_legacy_disable(struct mlx5_eswitch *esw)
+{
+ struct esw_mc_addr *mc_promisc;
+
+ mlx5_eswitch_disable_pf_vf_vports(esw);
+
+ mc_promisc = &esw->mc_promisc;
+ if (mc_promisc->uplink_rule)
+ mlx5_del_flow_rules(mc_promisc->uplink_rule);
+
+ esw_destroy_legacy_table(esw);
+}
+
+static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw,
+ u8 setting)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ void *misc;
+
+ if (!setting) {
+ esw_cleanup_vepa_rules(esw);
+ return 0;
+ }
+
+ if (esw->fdb_table.legacy.vepa_uplink_rule)
+ return 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Uplink rule forward uplink traffic to FDB */
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = esw->fdb_table.legacy.fdb;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ } else {
+ esw->fdb_table.legacy.vepa_uplink_rule = flow_rule;
+ }
+
+ /* Star rule to forward all traffic to uplink vport */
+ memset(&dest, 0, sizeof(dest));
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = MLX5_VPORT_UPLINK;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ } else {
+ esw->fdb_table.legacy.vepa_star_rule = flow_rule;
+ }
+
+out:
+ kvfree(spec);
+ if (err)
+ esw_cleanup_vepa_rules(esw);
+ return err;
+}
+
+int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
+{
+ int err = 0;
+
+ if (!esw)
+ return -EOPNOTSUPP;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = _mlx5_eswitch_set_vepa_locked(esw, setting);
+
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
+{
+ if (!esw)
+ return -EOPNOTSUPP;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ if (esw->mode != MLX5_ESWITCH_LEGACY)
+ return -EOPNOTSUPP;
+
+ *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
+ return 0;
+}
+
+int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int ret;
+
+ /* Only non manager vports need ACL in legacy mode */
+ if (mlx5_esw_is_manager_vport(esw, vport->vport))
+ return 0;
+
+ ret = esw_acl_ingress_lgcy_setup(esw, vport);
+ if (ret)
+ goto ingress_err;
+
+ ret = esw_acl_egress_lgcy_setup(esw, vport);
+ if (ret)
+ goto egress_err;
+
+ return 0;
+
+egress_err:
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+ingress_err:
+ return ret;
+}
+
+void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ if (mlx5_esw_is_manager_vport(esw, vport->vport))
+ return;
+
+ esw_acl_egress_lgcy_cleanup(esw, vport);
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+}
+
+int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev,
+ struct mlx5_vport *vport,
+ struct mlx5_vport_drop_stats *stats)
+{
+ u64 rx_discard_vport_down, tx_discard_vport_down;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ u64 bytes = 0;
+ int err = 0;
+
+ if (esw->mode != MLX5_ESWITCH_LEGACY)
+ return 0;
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->enabled)
+ goto unlock;
+
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter))
+ mlx5_fc_query(dev, vport->egress.legacy.drop_counter,
+ &stats->rx_dropped, &bytes);
+
+ if (vport->ingress.legacy.drop_counter)
+ mlx5_fc_query(dev, vport->ingress.legacy.drop_counter,
+ &stats->tx_dropped, &bytes);
+
+ if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
+ !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+ goto unlock;
+
+ err = mlx5_query_vport_down_stats(dev, vport->vport, 1,
+ &rx_discard_vport_down,
+ &tx_discard_vport_down);
+ if (err)
+ goto unlock;
+
+ if (MLX5_CAP_GEN(dev, receive_discard_vport_down))
+ stats->rx_dropped += rx_discard_vport_down;
+ if (MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+ stats->tx_dropped += tx_discard_vport_down;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ u16 vport, u16 vlan, u8 qos)
+{
+ u8 set_flags = 0;
+ int err = 0;
+
+ if (!mlx5_esw_allowed(esw))
+ return vlan ? -EPERM : 0;
+
+ if (vlan || qos)
+ set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ if (!vlan)
+ goto unlock; /* compatibility with libvirt */
+
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
+ u16 vport, bool spoofchk)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ bool pschk;
+ int err = 0;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+ pschk = evport->info.spoofchk;
+ evport->info.spoofchk = spoofchk;
+ if (pschk && !is_valid_ether_addr(evport->info.mac))
+ mlx5_core_warn(esw->dev,
+ "Spoofchk in set while MAC is invalid, vport(%d)\n",
+ evport->vport);
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
+ err = esw_acl_ingress_lgcy_setup(esw, evport);
+ if (err)
+ evport->info.spoofchk = pschk;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+ u16 vport, bool setting)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int err = 0;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+ evport->info.trusted = setting;
+ if (evport->enabled)
+ esw_vport_change_handle_locked(evport);
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
+ u32 max_rate, u32 min_rate)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int err;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_qos_set_vport_rate(esw, evport, max_rate, min_rate);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h
new file mode 100644
index 000000000..e0820bb72
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_ESW_LEGACY_H__
+#define __MLX5_ESW_LEGACY_H__
+
+#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \
+ MLX5_VPORT_MC_ADDR_CHANGE | \
+ MLX5_VPORT_PROMISC_CHANGE)
+
+struct mlx5_eswitch;
+
+int esw_legacy_enable(struct mlx5_eswitch *esw);
+void esw_legacy_disable(struct mlx5_eswitch *esw);
+
+int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev,
+ struct mlx5_vport *vport,
+ struct mlx5_vport_drop_stats *stats);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
new file mode 100644
index 000000000..75015d370
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -0,0 +1,943 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "eswitch.h"
+#include "esw/qos.h"
+#include "en/port.h"
+#define CREATE_TRACE_POINTS
+#include "diag/qos_tracepoint.h"
+
+/* Minimum supported BW share value by the HW is 1 Mbit/sec */
+#define MLX5_MIN_BW_SHARE 1
+
+#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
+ min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
+
+struct mlx5_esw_rate_group {
+ u32 tsar_ix;
+ u32 max_rate;
+ u32 min_rate;
+ u32 bw_share;
+ struct list_head list;
+};
+
+static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
+ u32 tsar_ix, u32 max_rate, u32 bw_share)
+{
+ u32 bitmask = 0;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+
+ return mlx5_modify_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ tsar_ix,
+ bitmask);
+}
+
+static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+ u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ int err;
+
+ err = esw_qos_tsar_config(dev, sched_ctx,
+ group->tsar_ix,
+ max_rate, bw_share);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
+
+ trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
+
+ return err;
+}
+
+static int esw_qos_vport_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share,
+ struct netlink_ext_ack *extack)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ int err;
+
+ if (!vport->qos.enabled)
+ return -EIO;
+
+ err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
+ max_rate, bw_share);
+ if (err) {
+ esw_warn(esw->dev,
+ "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
+ return err;
+ }
+
+ trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
+
+ return 0;
+}
+
+static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ bool group_level)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_vport *evport;
+ u32 max_guarantee = 0;
+ unsigned long i;
+
+ if (group_level) {
+ struct mlx5_esw_rate_group *group;
+
+ list_for_each_entry(group, &esw->qos.groups, list) {
+ if (group->min_rate < max_guarantee)
+ continue;
+ max_guarantee = group->min_rate;
+ }
+ } else {
+ mlx5_esw_for_each_vport(esw, i, evport) {
+ if (!evport->enabled || !evport->qos.enabled ||
+ evport->qos.group != group || evport->qos.min_rate < max_guarantee)
+ continue;
+ max_guarantee = evport->qos.min_rate;
+ }
+ }
+
+ if (max_guarantee)
+ return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+
+ /* If vports min rate divider is 0 but their group has bw_share configured, then
+ * need to set bw_share for vports to minimal value.
+ */
+ if (!group_level && !max_guarantee && group && group->bw_share)
+ return 1;
+ return 0;
+}
+
+static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
+{
+ if (divider)
+ return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
+
+ return 0;
+}
+
+static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
+ struct mlx5_vport *evport;
+ unsigned long i;
+ u32 bw_share;
+ int err;
+
+ mlx5_esw_for_each_vport(esw, i, evport) {
+ if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
+ continue;
+ bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
+
+ if (bw_share == evport->qos.bw_share)
+ continue;
+
+ err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
+ if (err)
+ return err;
+
+ evport->qos.bw_share = bw_share;
+ }
+
+ return 0;
+}
+
+static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_esw_rate_group *group;
+ u32 bw_share;
+ int err;
+
+ list_for_each_entry(group, &esw->qos.groups, list) {
+ bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
+
+ if (bw_share == group->bw_share)
+ continue;
+
+ err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
+ if (err)
+ return err;
+
+ group->bw_share = bw_share;
+
+ /* All the group's vports need to be set with default bw_share
+ * to enable them with QOS
+ */
+ err = esw_qos_normalize_vports_min_rate(esw, group, extack);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
+ u32 min_rate, struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share, previous_min_rate;
+ bool min_rate_supported;
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
+ fw_max_bw_share >= MLX5_MIN_BW_SHARE;
+ if (min_rate && !min_rate_supported)
+ return -EOPNOTSUPP;
+ if (min_rate == evport->qos.min_rate)
+ return 0;
+
+ previous_min_rate = evport->qos.min_rate;
+ evport->qos.min_rate = min_rate;
+ err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
+ if (err)
+ evport->qos.min_rate = previous_min_rate;
+
+ return err;
+}
+
+static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
+ u32 max_rate, struct netlink_ext_ack *extack)
+{
+ u32 act_max_rate = max_rate;
+ bool max_rate_supported;
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
+
+ if (max_rate && !max_rate_supported)
+ return -EOPNOTSUPP;
+ if (max_rate == evport->qos.max_rate)
+ return 0;
+
+ /* If parent group has rate limit need to set to group
+ * value when new max rate is 0.
+ */
+ if (evport->qos.group && !max_rate)
+ act_max_rate = evport->qos.group->max_rate;
+
+ err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
+
+ if (!err)
+ evport->qos.max_rate = max_rate;
+
+ return err;
+}
+
+static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+ u32 min_rate, struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 previous_min_rate, divider;
+ int err;
+
+ if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
+ return -EOPNOTSUPP;
+
+ if (min_rate == group->min_rate)
+ return 0;
+
+ previous_min_rate = group->min_rate;
+ group->min_rate = min_rate;
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err) {
+ group->min_rate = previous_min_rate;
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
+
+ /* Attempt restoring previous configuration */
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ u32 max_rate, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ if (group->max_rate == max_rate)
+ return 0;
+
+ err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
+ if (err)
+ return err;
+
+ group->max_rate = max_rate;
+
+ /* Any unlimited vports in the group should be set
+ * with the value of the group.
+ */
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ if (!vport->enabled || !vport->qos.enabled ||
+ vport->qos.group != group || vport->qos.max_rate)
+ continue;
+
+ err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch vport implicit rate limit setting failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
+ void *vport_elem;
+ int err;
+
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ &vport->qos.esw_tsar_ix);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *curr_group,
+ struct mlx5_esw_rate_group *new_group,
+ struct netlink_ext_ack *extack)
+{
+ u32 max_rate;
+ int err;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
+ return err;
+ }
+
+ vport->qos.group = new_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
+
+ /* If vport is unlimited, we set the group's value.
+ * Therefore, if the group is limited it will apply to
+ * the vport as well and if not, vport will remain unlimited.
+ */
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
+ goto err_sched;
+ }
+
+ return 0;
+
+err_sched:
+ vport->qos.group = curr_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
+ if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
+ esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
+ vport->vport);
+
+ return err;
+}
+
+static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *new_group, *curr_group;
+ int err;
+
+ if (!vport->enabled)
+ return -EINVAL;
+
+ curr_group = vport->qos.group;
+ new_group = group ?: esw->qos.group0;
+ if (curr_group == new_group)
+ return 0;
+
+ err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
+ if (err)
+ return err;
+
+ /* Recalculate bw share weights of old and new groups */
+ if (vport->qos.bw_share || new_group->bw_share) {
+ esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
+ esw_qos_normalize_vports_min_rate(esw, new_group, extack);
+ }
+
+ return 0;
+}
+
+static struct mlx5_esw_rate_group *
+__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group;
+ u32 divider;
+ int err;
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
+ esw->qos.root_tsar_ix);
+ err = mlx5_create_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &group->tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
+ goto err_sched_elem;
+ }
+
+ list_add_tail(&group->list, &esw->qos.groups);
+
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ if (divider) {
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
+ goto err_min_rate;
+ }
+ }
+ trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
+
+ return group;
+
+err_min_rate:
+ list_del(&group->list);
+ if (mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ group->tsar_ix))
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
+err_sched_elem:
+ kfree(group);
+ return ERR_PTR(err);
+}
+
+static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
+static void esw_qos_put(struct mlx5_eswitch *esw);
+
+static struct mlx5_esw_rate_group *
+esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ int err;
+
+ if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = esw_qos_get(esw, extack);
+ if (err)
+ return ERR_PTR(err);
+
+ group = __esw_qos_create_rate_group(esw, extack);
+ if (IS_ERR(group))
+ esw_qos_put(esw);
+
+ return group;
+}
+
+static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ u32 divider;
+ int err;
+
+ list_del(&group->list);
+
+ divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ group->tsar_ix);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
+
+ trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
+
+ kfree(group);
+
+ return err;
+}
+
+static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ err = __esw_qos_destroy_rate_group(esw, group, extack);
+ esw_qos_put(esw);
+
+ return err;
+}
+
+static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+{
+ switch (type) {
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_TASR;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+ }
+ return false;
+}
+
+static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ __be32 *attr;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+ if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(scheduling_context, tsar_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+ attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+ *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &esw->qos.root_tsar_ix);
+ if (err) {
+ esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
+ return err;
+ }
+
+ INIT_LIST_HEAD(&esw->qos.groups);
+ if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
+ esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
+ if (IS_ERR(esw->qos.group0)) {
+ esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
+ PTR_ERR(esw->qos.group0));
+ err = PTR_ERR(esw->qos.group0);
+ goto err_group0;
+ }
+ }
+ refcount_set(&esw->qos.refcnt, 1);
+
+ return 0;
+
+err_group0:
+ if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_ix))
+ esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
+
+ return err;
+}
+
+static void esw_qos_destroy(struct mlx5_eswitch *esw)
+{
+ int err;
+
+ if (esw->qos.group0)
+ __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+}
+
+static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ lockdep_assert_held(&esw->state_lock);
+
+ if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
+ /* esw_qos_create() set refcount to 1 only on success.
+ * No need to decrement on failure.
+ */
+ err = esw_qos_create(esw, extack);
+ }
+
+ return err;
+}
+
+static void esw_qos_put(struct mlx5_eswitch *esw)
+{
+ lockdep_assert_held(&esw->state_lock);
+ if (refcount_dec_and_test(&esw->qos.refcnt))
+ esw_qos_destroy(esw);
+}
+
+static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ if (vport->qos.enabled)
+ return 0;
+
+ err = esw_qos_get(esw, extack);
+ if (err)
+ return err;
+
+ vport->qos.group = esw->qos.group0;
+
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
+ if (err)
+ goto err_out;
+
+ vport->qos.enabled = true;
+ trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
+
+ return 0;
+
+err_out:
+ esw_qos_put(esw);
+
+ return err;
+}
+
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ if (!vport->qos.enabled)
+ return;
+ WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
+ "Disabling QoS on port before detaching it from group");
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+
+ memset(&vport->qos, 0, sizeof(vport->qos));
+ trace_mlx5_esw_vport_qos_destroy(vport);
+
+ esw_qos_put(esw);
+}
+
+int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 min_rate)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
+ if (err)
+ return err;
+
+ err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
+ if (!err)
+ err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
+
+ return err;
+}
+
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
+{
+ u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_vport *vport;
+ u32 bitmask;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->qos.enabled) {
+ /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
+ err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
+ } else {
+ MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
+
+ bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ err = mlx5_modify_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ ctx,
+ vport->qos.esw_tsar_ix,
+ bitmask);
+ }
+ mutex_unlock(&esw->state_lock);
+
+ return err;
+}
+
+#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
+
+/* Converts bytes per second value passed in a pointer into megabits per
+ * second, rewriting last. If converted rate exceed link speed or is not a
+ * fraction of Mbps - returns error.
+ */
+static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
+ u64 *rate, struct netlink_ext_ack *extack)
+{
+ u32 link_speed_max, reminder;
+ u64 value;
+ int err;
+
+ err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
+ return err;
+ }
+
+ value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
+ if (reminder) {
+ pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
+ name, *rate);
+ NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
+ return -EINVAL;
+ }
+
+ if (value > link_speed_max) {
+ pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
+ name, value, link_speed_max);
+ NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
+ return -EINVAL;
+ }
+
+ *rate = value;
+ return 0;
+}
+
+/* Eswitch devlink rate API */
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = vport->dev->priv.eswitch;
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
+ if (err)
+ goto unlock;
+
+ err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = vport->dev->priv.eswitch;
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
+ if (err)
+ goto unlock;
+
+ err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_esw_rate_group *group = priv;
+ int err;
+
+ err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_esw_rate_group *group = priv;
+ int err;
+
+ err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_eswitch *esw;
+ int err = 0;
+
+ esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Rate node creation supported only in switchdev mode");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ group = esw_qos_create_rate_group(esw, extack);
+ if (IS_ERR(group)) {
+ err = PTR_ERR(group);
+ goto unlock;
+ }
+
+ *priv = group;
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_destroy_rate_group(esw, group, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->qos.enabled && !group)
+ goto unlock;
+
+ err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
+ if (!err)
+ err = esw_qos_vport_update_group(esw, vport, group, extack);
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_vport *vport = priv;
+
+ if (!parent)
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
+ vport, NULL, extack);
+
+ group = parent_priv;
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
new file mode 100644
index 000000000..0141e9d52
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_ESW_QOS_H__
+#define __MLX5_ESW_QOS_H__
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
+ u32 max_rate, u32 min_rate);
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack);
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
new file mode 100644
index 000000000..749c3957a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 Mellanox Technologies.
+
+#include "eswitch.h"
+
+/* This struct is used as a key to the hash table and we need it to be packed
+ * so hash result is consistent
+ */
+struct mlx5_vport_key {
+ u32 chain;
+ u16 prio;
+ u16 vport;
+ u16 vhca_id;
+ struct esw_vport_tbl_namespace *vport_ns;
+} __packed;
+
+struct mlx5_vport_table {
+ struct hlist_node hlist;
+ struct mlx5_flow_table *fdb;
+ u32 num_rules;
+ struct mlx5_vport_key key;
+};
+
+static void
+esw_vport_tbl_init(struct mlx5_eswitch *esw, struct esw_vport_tbl_namespace *ns)
+{
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ ns->flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+}
+
+static struct mlx5_flow_table *
+esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns,
+ const struct esw_vport_tbl_namespace *vport_ns)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *fdb;
+
+ if (vport_ns->max_num_groups)
+ ft_attr.autogroup.max_num_groups = vport_ns->max_num_groups;
+ else
+ ft_attr.autogroup.max_num_groups = esw->params.large_group_num;
+ ft_attr.max_fte = vport_ns->max_fte;
+ ft_attr.prio = FDB_PER_VPORT;
+ ft_attr.flags = vport_ns->flags;
+ fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n",
+ PTR_ERR(fdb));
+ }
+
+ return fdb;
+}
+
+static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw,
+ struct mlx5_vport_tbl_attr *attr,
+ struct mlx5_vport_key *key)
+{
+ key->vport = attr->vport;
+ key->chain = attr->chain;
+ key->prio = attr->prio;
+ key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ key->vport_ns = attr->vport_ns;
+ return jhash(key, sizeof(*key), 0);
+}
+
+/* caller must hold vports.lock */
+static struct mlx5_vport_table *
+esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key)
+{
+ struct mlx5_vport_table *e;
+
+ hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key)
+ if (!memcmp(&e->key, skey, sizeof(*skey)))
+ return e;
+
+ return NULL;
+}
+
+struct mlx5_flow_table *
+mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *fdb;
+ struct mlx5_vport_table *e;
+ struct mlx5_vport_key skey;
+ u32 hkey;
+
+ mutex_lock(&esw->fdb_table.offloads.vports.lock);
+ esw_vport_tbl_init(esw, attr->vport_ns);
+ hkey = flow_attr_to_vport_key(esw, attr, &skey);
+ e = esw_vport_tbl_lookup(esw, &skey, hkey);
+ if (e) {
+ e->num_rules++;
+ goto out;
+ }
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e) {
+ fdb = ERR_PTR(-ENOMEM);
+ goto err_alloc;
+ }
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!ns) {
+ esw_warn(dev, "Failed to get FDB namespace\n");
+ fdb = ERR_PTR(-ENOENT);
+ goto err_ns;
+ }
+
+ fdb = esw_vport_tbl_create(esw, ns, attr->vport_ns);
+ if (IS_ERR(fdb))
+ goto err_ns;
+
+ e->fdb = fdb;
+ e->num_rules = 1;
+ e->key = skey;
+ hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey);
+out:
+ mutex_unlock(&esw->fdb_table.offloads.vports.lock);
+ return e->fdb;
+
+err_ns:
+ kfree(e);
+err_alloc:
+ mutex_unlock(&esw->fdb_table.offloads.vports.lock);
+ return fdb;
+}
+
+void
+mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
+{
+ struct mlx5_vport_table *e;
+ struct mlx5_vport_key key;
+ u32 hkey;
+
+ mutex_lock(&esw->fdb_table.offloads.vports.lock);
+ esw_vport_tbl_init(esw, attr->vport_ns);
+ hkey = flow_attr_to_vport_key(esw, attr, &key);
+ e = esw_vport_tbl_lookup(esw, &key, hkey);
+ if (!e || --e->num_rules)
+ goto out;
+
+ hash_del(&e->hlist);
+ mlx5_destroy_flow_table(e->fdb);
+ kfree(e);
+out:
+ mutex_unlock(&esw->fdb_table.offloads.vports.lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
new file mode 100644
index 000000000..48939c72b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -0,0 +1,2075 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/mpfs.h>
+#include <linux/debugfs.h>
+#include "esw/acl/lgcy.h"
+#include "esw/legacy.h"
+#include "esw/qos.h"
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "eswitch.h"
+#include "fs_core.h"
+#include "devlink.h"
+#include "ecpf.h"
+#include "en/mod_hdr.h"
+
+enum {
+ MLX5_ACTION_NONE = 0,
+ MLX5_ACTION_ADD = 1,
+ MLX5_ACTION_DEL = 2,
+};
+
+/* Vport UC/MC hash node */
+struct vport_addr {
+ struct l2addr_node node;
+ u8 action;
+ u16 vport;
+ struct mlx5_flow_handle *flow_rule;
+ bool mpfs; /* UC MAC was added to MPFs */
+ /* A flag indicating that mac was added due to mc promiscuous vport */
+ bool mc_promisc;
+};
+
+static int mlx5_eswitch_check(const struct mlx5_core_dev *dev)
+{
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EOPNOTSUPP;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int err;
+
+ err = mlx5_eswitch_check(dev);
+ if (err)
+ return ERR_PTR(err);
+
+ return dev->priv.eswitch;
+}
+
+struct mlx5_vport *__must_check
+mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ return ERR_PTR(-EPERM);
+
+ vport = xa_load(&esw->vports, vport_num);
+ if (!vport) {
+ esw_debug(esw->dev, "vport out of range: num(0x%x)\n", vport_num);
+ return ERR_PTR(-EINVAL);
+ }
+ return vport;
+}
+
+static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
+ u32 events_mask)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {};
+ void *nic_vport_ctx;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1);
+
+ if (events_mask & MLX5_VPORT_UC_ADDR_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_uc_address_change, 1);
+ if (events_mask & MLX5_VPORT_MC_ADDR_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_mc_address_change, 1);
+ if (events_mask & MLX5_VPORT_PROMISC_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_promisc_change, 1);
+
+ return mlx5_cmd_exec_in(dev, modify_nic_vport_context, in);
+}
+
+/* E-Switch vport context HW commands */
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
+ bool other_vport, void *in)
+{
+ MLX5_SET(modify_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
+ MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport);
+ return mlx5_cmd_exec_in(dev, modify_esw_vport_context, in);
+}
+
+static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
+ u16 vlan, u8 qos, u8 set_flags)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+
+ if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
+ !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
+ return -EOPNOTSUPP;
+
+ esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n",
+ vport, vlan, qos, set_flags);
+
+ if (set_flags & SET_VLAN_STRIP)
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_strip, 1);
+
+ if (set_flags & SET_VLAN_INSERT) {
+ if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) {
+ /* insert either if vlan exist in packet or not */
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_insert,
+ MLX5_VPORT_CVLAN_INSERT_ALWAYS);
+ } else {
+ /* insert only if no vlan in packet */
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_insert,
+ MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN);
+ }
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.cvlan_pcp, qos);
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.cvlan_id, vlan);
+ }
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.vport_cvlan_strip, 1);
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.vport_cvlan_insert, 1);
+
+ return mlx5_eswitch_modify_esw_vport_context(dev, vport, true, in);
+}
+
+/* E-Switch FDB */
+static struct mlx5_flow_handle *
+__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule,
+ u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
+{
+ int match_header = (is_zero_ether_addr(mac_c) ? 0 :
+ MLX5_MATCH_OUTER_HEADERS);
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_spec *spec;
+ void *mv_misc = NULL;
+ void *mc_misc = NULL;
+ u8 *dmac_v = NULL;
+ u8 *dmac_c = NULL;
+
+ if (rx_rule)
+ match_header |= MLX5_MATCH_MISC_PARAMETERS;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+
+ dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dmac_47_16);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dmac_47_16);
+
+ if (match_header & MLX5_MATCH_OUTER_HEADERS) {
+ ether_addr_copy(dmac_v, mac_v);
+ ether_addr_copy(dmac_c, mac_c);
+ }
+
+ if (match_header & MLX5_MATCH_MISC_PARAMETERS) {
+ mv_misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK);
+ MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port);
+ }
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = vport;
+
+ esw_debug(esw->dev,
+ "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
+ dmac_v, dmac_c, vport);
+ spec->match_criteria_enable = match_header;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule =
+ mlx5_add_flow_rules(esw->fdb_table.legacy.fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev,
+ "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
+ dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
+ flow_rule = NULL;
+ }
+
+ kvfree(spec);
+ return flow_rule;
+}
+
+static struct mlx5_flow_handle *
+esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport)
+{
+ u8 mac_c[ETH_ALEN];
+
+ eth_broadcast_addr(mac_c);
+ return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac);
+}
+
+static struct mlx5_flow_handle *
+esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport)
+{
+ u8 mac_c[ETH_ALEN];
+ u8 mac_v[ETH_ALEN];
+
+ eth_zero_addr(mac_c);
+ eth_zero_addr(mac_v);
+ mac_c[0] = 0x01;
+ mac_v[0] = 0x01;
+ return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v);
+}
+
+static struct mlx5_flow_handle *
+esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport)
+{
+ u8 mac_c[ETH_ALEN];
+ u8 mac_v[ETH_ALEN];
+
+ eth_zero_addr(mac_c);
+ eth_zero_addr(mac_v);
+ return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
+}
+
+/* E-Switch vport UC/MC lists management */
+typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
+ struct vport_addr *vaddr);
+
+static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ u8 *mac = vaddr->node.addr;
+ u16 vport = vaddr->vport;
+ int err;
+
+ /* Skip mlx5_mpfs_add_mac for eswitch_managers,
+ * it is already done by its netdev in mlx5e_execute_l2_action
+ */
+ if (mlx5_esw_is_manager_vport(esw, vport))
+ goto fdb_add;
+
+ err = mlx5_mpfs_add_mac(esw->dev, mac);
+ if (err) {
+ esw_warn(esw->dev,
+ "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n",
+ mac, vport, err);
+ return err;
+ }
+ vaddr->mpfs = true;
+
+fdb_add:
+ /* SRIOV is enabled: Forward UC MAC to vport */
+ if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY)
+ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+
+ esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
+ vport, mac, vaddr->flow_rule);
+
+ return 0;
+}
+
+static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ u8 *mac = vaddr->node.addr;
+ u16 vport = vaddr->vport;
+ int err = 0;
+
+ /* Skip mlx5_mpfs_del_mac for eswitch managers,
+ * it is already done by its netdev in mlx5e_execute_l2_action
+ */
+ if (!vaddr->mpfs || mlx5_esw_is_manager_vport(esw, vport))
+ goto fdb_del;
+
+ err = mlx5_mpfs_del_mac(esw->dev, mac);
+ if (err)
+ esw_warn(esw->dev,
+ "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n",
+ mac, vport, err);
+ vaddr->mpfs = false;
+
+fdb_del:
+ if (vaddr->flow_rule)
+ mlx5_del_flow_rules(vaddr->flow_rule);
+ vaddr->flow_rule = NULL;
+
+ return 0;
+}
+
+static void update_allmulti_vports(struct mlx5_eswitch *esw,
+ struct vport_addr *vaddr,
+ struct esw_mc_addr *esw_mc)
+{
+ u8 *mac = vaddr->node.addr;
+ struct mlx5_vport *vport;
+ unsigned long i;
+ u16 vport_num;
+
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ struct hlist_head *vport_hash = vport->mc_list;
+ struct vport_addr *iter_vaddr =
+ l2addr_hash_find(vport_hash,
+ mac,
+ struct vport_addr);
+ vport_num = vport->vport;
+ if (IS_ERR_OR_NULL(vport->allmulti_rule) ||
+ vaddr->vport == vport_num)
+ continue;
+ switch (vaddr->action) {
+ case MLX5_ACTION_ADD:
+ if (iter_vaddr)
+ continue;
+ iter_vaddr = l2addr_hash_add(vport_hash, mac,
+ struct vport_addr,
+ GFP_KERNEL);
+ if (!iter_vaddr) {
+ esw_warn(esw->dev,
+ "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n",
+ mac, vport_num);
+ continue;
+ }
+ iter_vaddr->vport = vport_num;
+ iter_vaddr->flow_rule =
+ esw_fdb_set_vport_rule(esw,
+ mac,
+ vport_num);
+ iter_vaddr->mc_promisc = true;
+ break;
+ case MLX5_ACTION_DEL:
+ if (!iter_vaddr)
+ continue;
+ mlx5_del_flow_rules(iter_vaddr->flow_rule);
+ l2addr_hash_del(iter_vaddr);
+ break;
+ }
+ }
+}
+
+static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->mc_table;
+ struct esw_mc_addr *esw_mc;
+ u8 *mac = vaddr->node.addr;
+ u16 vport = vaddr->vport;
+
+ if (!esw->fdb_table.legacy.fdb)
+ return 0;
+
+ esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+ if (esw_mc)
+ goto add;
+
+ esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL);
+ if (!esw_mc)
+ return -ENOMEM;
+
+ esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
+ esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK);
+
+ /* Add this multicast mac to all the mc promiscuous vports */
+ update_allmulti_vports(esw, vaddr, esw_mc);
+
+add:
+ /* If the multicast mac is added as a result of mc promiscuous vport,
+ * don't increment the multicast ref count
+ */
+ if (!vaddr->mc_promisc)
+ esw_mc->refcnt++;
+
+ /* Forward MC MAC to vport */
+ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+ esw_debug(esw->dev,
+ "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+ vport, mac, vaddr->flow_rule,
+ esw_mc->refcnt, esw_mc->uplink_rule);
+ return 0;
+}
+
+static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->mc_table;
+ struct esw_mc_addr *esw_mc;
+ u8 *mac = vaddr->node.addr;
+ u16 vport = vaddr->vport;
+
+ if (!esw->fdb_table.legacy.fdb)
+ return 0;
+
+ esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+ if (!esw_mc) {
+ esw_warn(esw->dev,
+ "Failed to find eswitch MC addr for MAC(%pM) vport(%d)",
+ mac, vport);
+ return -EINVAL;
+ }
+ esw_debug(esw->dev,
+ "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+ vport, mac, vaddr->flow_rule, esw_mc->refcnt,
+ esw_mc->uplink_rule);
+
+ if (vaddr->flow_rule)
+ mlx5_del_flow_rules(vaddr->flow_rule);
+ vaddr->flow_rule = NULL;
+
+ /* If the multicast mac is added as a result of mc promiscuous vport,
+ * don't decrement the multicast ref count.
+ */
+ if (vaddr->mc_promisc || (--esw_mc->refcnt > 0))
+ return 0;
+
+ /* Remove this multicast mac from all the mc promiscuous vports */
+ update_allmulti_vports(esw, vaddr, esw_mc);
+
+ if (esw_mc->uplink_rule)
+ mlx5_del_flow_rules(esw_mc->uplink_rule);
+
+ l2addr_hash_del(esw_mc);
+ return 0;
+}
+
+/* Apply vport UC/MC list to HW l2 table and FDB table */
+static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport, int list_type)
+{
+ bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+ vport_addr_action vport_addr_add;
+ vport_addr_action vport_addr_del;
+ struct vport_addr *addr;
+ struct l2addr_node *node;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int hi;
+
+ vport_addr_add = is_uc ? esw_add_uc_addr :
+ esw_add_mc_addr;
+ vport_addr_del = is_uc ? esw_del_uc_addr :
+ esw_del_mc_addr;
+
+ hash = is_uc ? vport->uc_list : vport->mc_list;
+ for_each_l2hash_node(node, tmp, hash, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ switch (addr->action) {
+ case MLX5_ACTION_ADD:
+ vport_addr_add(esw, addr);
+ addr->action = MLX5_ACTION_NONE;
+ break;
+ case MLX5_ACTION_DEL:
+ vport_addr_del(esw, addr);
+ l2addr_hash_del(addr);
+ break;
+ }
+ }
+}
+
+/* Sync vport UC/MC list from vport context */
+static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport, int list_type)
+{
+ bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+ u8 (*mac_list)[ETH_ALEN];
+ struct l2addr_node *node;
+ struct vport_addr *addr;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int size;
+ int err;
+ int hi;
+ int i;
+
+ size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) :
+ MLX5_MAX_MC_PER_VPORT(esw->dev);
+
+ mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+ if (!mac_list)
+ return;
+
+ hash = is_uc ? vport->uc_list : vport->mc_list;
+
+ for_each_l2hash_node(node, tmp, hash, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ addr->action = MLX5_ACTION_DEL;
+ }
+
+ if (!vport->enabled)
+ goto out;
+
+ err = mlx5_query_nic_vport_mac_list(esw->dev, vport->vport, list_type,
+ mac_list, &size);
+ if (err)
+ goto out;
+ esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n",
+ vport->vport, is_uc ? "UC" : "MC", size);
+
+ for (i = 0; i < size; i++) {
+ if (is_uc && !is_valid_ether_addr(mac_list[i]))
+ continue;
+
+ if (!is_uc && !is_multicast_ether_addr(mac_list[i]))
+ continue;
+
+ addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr);
+ if (addr) {
+ addr->action = MLX5_ACTION_NONE;
+ /* If this mac was previously added because of allmulti
+ * promiscuous rx mode, its now converted to be original
+ * vport mac.
+ */
+ if (addr->mc_promisc) {
+ struct esw_mc_addr *esw_mc =
+ l2addr_hash_find(esw->mc_table,
+ mac_list[i],
+ struct esw_mc_addr);
+ if (!esw_mc) {
+ esw_warn(esw->dev,
+ "Failed to MAC(%pM) in mcast DB\n",
+ mac_list[i]);
+ continue;
+ }
+ esw_mc->refcnt++;
+ addr->mc_promisc = false;
+ }
+ continue;
+ }
+
+ addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr,
+ GFP_KERNEL);
+ if (!addr) {
+ esw_warn(esw->dev,
+ "Failed to add MAC(%pM) to vport[%d] DB\n",
+ mac_list[i], vport->vport);
+ continue;
+ }
+ addr->vport = vport->vport;
+ addr->action = MLX5_ACTION_ADD;
+ }
+out:
+ kfree(mac_list);
+}
+
+/* Sync vport UC/MC list from vport context
+ * Must be called after esw_update_vport_addr_list
+ */
+static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct l2addr_node *node;
+ struct vport_addr *addr;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int hi;
+
+ hash = vport->mc_list;
+
+ for_each_l2hash_node(node, tmp, esw->mc_table, hi) {
+ u8 *mac = node->addr;
+
+ addr = l2addr_hash_find(hash, mac, struct vport_addr);
+ if (addr) {
+ if (addr->action == MLX5_ACTION_DEL)
+ addr->action = MLX5_ACTION_NONE;
+ continue;
+ }
+ addr = l2addr_hash_add(hash, mac, struct vport_addr,
+ GFP_KERNEL);
+ if (!addr) {
+ esw_warn(esw->dev,
+ "Failed to add allmulti MAC(%pM) to vport[%d] DB\n",
+ mac, vport->vport);
+ continue;
+ }
+ addr->vport = vport->vport;
+ addr->action = MLX5_ACTION_ADD;
+ addr->mc_promisc = true;
+ }
+}
+
+/* Apply vport rx mode to HW FDB table */
+static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ bool promisc, bool mc_promisc)
+{
+ struct esw_mc_addr *allmulti_addr = &esw->mc_promisc;
+
+ if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc)
+ goto promisc;
+
+ if (mc_promisc) {
+ vport->allmulti_rule =
+ esw_fdb_set_vport_allmulti_rule(esw, vport->vport);
+ if (!allmulti_addr->uplink_rule)
+ allmulti_addr->uplink_rule =
+ esw_fdb_set_vport_allmulti_rule(esw,
+ MLX5_VPORT_UPLINK);
+ allmulti_addr->refcnt++;
+ } else if (vport->allmulti_rule) {
+ mlx5_del_flow_rules(vport->allmulti_rule);
+ vport->allmulti_rule = NULL;
+
+ if (--allmulti_addr->refcnt > 0)
+ goto promisc;
+
+ if (allmulti_addr->uplink_rule)
+ mlx5_del_flow_rules(allmulti_addr->uplink_rule);
+ allmulti_addr->uplink_rule = NULL;
+ }
+
+promisc:
+ if (IS_ERR_OR_NULL(vport->promisc_rule) != promisc)
+ return;
+
+ if (promisc) {
+ vport->promisc_rule =
+ esw_fdb_set_vport_promisc_rule(esw, vport->vport);
+ } else if (vport->promisc_rule) {
+ mlx5_del_flow_rules(vport->promisc_rule);
+ vport->promisc_rule = NULL;
+ }
+}
+
+/* Sync vport rx mode from vport context */
+static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int promisc_all = 0;
+ int promisc_uc = 0;
+ int promisc_mc = 0;
+ int err;
+
+ err = mlx5_query_nic_vport_promisc(esw->dev,
+ vport->vport,
+ &promisc_uc,
+ &promisc_mc,
+ &promisc_all);
+ if (err)
+ return;
+ esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n",
+ vport->vport, promisc_all, promisc_mc);
+
+ if (!vport->info.trusted || !vport->enabled) {
+ promisc_uc = 0;
+ promisc_mc = 0;
+ promisc_all = 0;
+ }
+
+ esw_apply_vport_rx_mode(esw, vport, promisc_all,
+ (promisc_all || promisc_mc));
+}
+
+void esw_vport_change_handle_locked(struct mlx5_vport *vport)
+{
+ struct mlx5_core_dev *dev = vport->dev;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ u8 mac[ETH_ALEN];
+
+ mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac);
+ esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
+ vport->vport, mac);
+
+ if (vport->enabled_events & MLX5_VPORT_UC_ADDR_CHANGE) {
+ esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC);
+ esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC);
+ }
+
+ if (vport->enabled_events & MLX5_VPORT_MC_ADDR_CHANGE)
+ esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC);
+
+ if (vport->enabled_events & MLX5_VPORT_PROMISC_CHANGE) {
+ esw_update_vport_rx_mode(esw, vport);
+ if (!IS_ERR_OR_NULL(vport->allmulti_rule))
+ esw_update_vport_mc_promisc(esw, vport);
+ }
+
+ if (vport->enabled_events & (MLX5_VPORT_PROMISC_CHANGE | MLX5_VPORT_MC_ADDR_CHANGE))
+ esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC);
+
+ esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport);
+ if (vport->enabled)
+ arm_vport_context_events_cmd(dev, vport->vport,
+ vport->enabled_events);
+}
+
+static void esw_vport_change_handler(struct work_struct *work)
+{
+ struct mlx5_vport *vport =
+ container_of(work, struct mlx5_vport, vport_change_handler);
+ struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
+
+ mutex_lock(&esw->state_lock);
+ esw_vport_change_handle_locked(vport);
+ mutex_unlock(&esw->state_lock);
+}
+
+static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac)
+{
+ ((u8 *)node_guid)[7] = mac[0];
+ ((u8 *)node_guid)[6] = mac[1];
+ ((u8 *)node_guid)[5] = mac[2];
+ ((u8 *)node_guid)[4] = 0xff;
+ ((u8 *)node_guid)[3] = 0xfe;
+ ((u8 *)node_guid)[2] = mac[3];
+ ((u8 *)node_guid)[1] = mac[4];
+ ((u8 *)node_guid)[0] = mac[5];
+}
+
+static int esw_vport_setup_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (esw->mode == MLX5_ESWITCH_LEGACY)
+ return esw_legacy_vport_acl_setup(esw, vport);
+ else
+ return esw_vport_create_offloads_acl_tables(esw, vport);
+}
+
+static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (esw->mode == MLX5_ESWITCH_LEGACY)
+ esw_legacy_vport_acl_cleanup(esw, vport);
+ else
+ esw_vport_destroy_offloads_acl_tables(esw, vport);
+}
+
+static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+ u16 vport_num = vport->vport;
+ int flags;
+ int err;
+
+ err = esw_vport_setup_acl(esw, vport);
+ if (err)
+ return err;
+
+ if (mlx5_esw_is_manager_vport(esw, vport_num))
+ return 0;
+
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport_num, 1,
+ vport->info.link_state);
+
+ /* Host PF has its own mac/guid. */
+ if (vport_num) {
+ mlx5_modify_nic_vport_mac_address(esw->dev, vport_num,
+ vport->info.mac);
+ mlx5_modify_nic_vport_node_guid(esw->dev, vport_num,
+ vport->info.node_guid);
+ }
+
+ flags = (vport->info.vlan || vport->info.qos) ?
+ SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering)
+ modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
+ vport->info.qos, flags);
+
+ return 0;
+}
+
+/* Don't cleanup vport->info, it's needed to restore vport configuration */
+static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ u16 vport_num = vport->vport;
+
+ if (!mlx5_esw_is_manager_vport(esw, vport_num))
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport_num, 1,
+ MLX5_VPORT_ADMIN_STATE_DOWN);
+
+ mlx5_esw_qos_vport_disable(esw, vport);
+ esw_vport_cleanup_acl(esw, vport);
+}
+
+int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events)
+{
+ struct mlx5_vport *vport;
+ int ret;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ mutex_lock(&esw->state_lock);
+ WARN_ON(vport->enabled);
+
+ esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
+
+ ret = esw_vport_setup(esw, vport);
+ if (ret)
+ goto done;
+
+ /* Sync with current vport context */
+ vport->enabled_events = enabled_events;
+ vport->enabled = true;
+
+ /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well
+ * in smartNIC as it's a vport group manager.
+ */
+ if (mlx5_esw_is_manager_vport(esw, vport_num) ||
+ (!vport_num && mlx5_core_is_ecpf(esw->dev)))
+ vport->info.trusted = true;
+
+ if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
+ MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
+ ret = mlx5_esw_vport_vhca_id_set(esw, vport_num);
+ if (ret)
+ goto err_vhca_mapping;
+ }
+
+ /* External controller host PF has factory programmed MAC.
+ * Read it from the device.
+ */
+ if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF)
+ mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac);
+
+ esw_vport_change_handle_locked(vport);
+
+ esw->enabled_vports++;
+ esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num);
+done:
+ mutex_unlock(&esw->state_lock);
+ return ret;
+
+err_vhca_mapping:
+ esw_vport_cleanup(esw, vport);
+ mutex_unlock(&esw->state_lock);
+ return ret;
+}
+
+void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return;
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->enabled)
+ goto done;
+
+ esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num);
+ /* Mark this vport as disabled to discard new events */
+ vport->enabled = false;
+
+ /* Disable events from this vport */
+ arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
+
+ if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
+ MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ mlx5_esw_vport_vhca_id_clear(esw, vport_num);
+
+ /* We don't assume VFs will cleanup after themselves.
+ * Calling vport change handler while vport is disabled will cleanup
+ * the vport resources.
+ */
+ esw_vport_change_handle_locked(vport);
+ vport->enabled_events = 0;
+ esw_apply_vport_rx_mode(esw, vport, false, false);
+ esw_vport_cleanup(esw, vport);
+ esw->enabled_vports--;
+
+done:
+ mutex_unlock(&esw->state_lock);
+}
+
+static int eswitch_vport_event(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb);
+ struct mlx5_eqe *eqe = data;
+ struct mlx5_vport *vport;
+ u16 vport_num;
+
+ vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (!IS_ERR(vport))
+ queue_work(esw->work_queue, &vport->vport_change_handler);
+ return NOTIFY_OK;
+}
+
+/**
+ * mlx5_esw_query_functions - Returns raw output about functions state
+ * @dev: Pointer to device to query
+ *
+ * mlx5_esw_query_functions() allocates and returns functions changed
+ * raw output memory pointer from device on success. Otherwise returns ERR_PTR.
+ * Caller must free the memory using kvfree() when valid pointer is returned.
+ */
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out);
+ u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {};
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(query_esw_functions_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_FUNCTIONS);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ if (!err)
+ return out;
+
+ kvfree(out);
+ return ERR_PTR(err);
+}
+
+static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw)
+{
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) {
+ MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler,
+ ESW_FUNCTIONS_CHANGED);
+ mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
+ }
+}
+
+static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw)
+{
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev))
+ mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
+
+ flush_workqueue(esw->work_queue);
+}
+
+static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ memset(&vport->qos, 0, sizeof(vport->qos));
+ memset(&vport->info, 0, sizeof(vport->info));
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ }
+}
+
+/* Public E-Switch API */
+int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events)
+{
+ int err;
+
+ err = mlx5_esw_vport_enable(esw, vport_num, enabled_events);
+ if (err)
+ return err;
+
+ mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0);
+ err = esw_offloads_load_rep(esw, vport_num);
+ if (err)
+ goto err_rep;
+
+ return err;
+
+err_rep:
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+ mlx5_esw_vport_disable(esw, vport_num);
+ return err;
+}
+
+void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ esw_offloads_unload_rep(esw, vport_num);
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+ mlx5_esw_vport_disable(esw, vport_num);
+}
+
+void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ if (!vport->enabled)
+ continue;
+ mlx5_eswitch_unload_vport(esw, vport->vport);
+ }
+}
+
+int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
+ enum mlx5_eswitch_vport_event enabled_events)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events);
+ if (err)
+ goto vf_err;
+ }
+
+ return 0;
+
+vf_err:
+ mlx5_eswitch_unload_vf_vports(esw, num_vfs);
+ return err;
+}
+
+static int host_pf_enable_hca(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_ecpf(dev))
+ return 0;
+
+ /* Once vport and representor are ready, take out the external host PF
+ * out of initializing state. Enabling HCA clears the iser->initializing
+ * bit and host PF driver loading can progress.
+ */
+ return mlx5_cmd_host_pf_enable_hca(dev);
+}
+
+static void host_pf_disable_hca(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_ecpf(dev))
+ return;
+
+ mlx5_cmd_host_pf_disable_hca(dev);
+}
+
+/* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs
+ * whichever are present on the eswitch.
+ */
+int
+mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+ enum mlx5_eswitch_vport_event enabled_events)
+{
+ int ret;
+
+ /* Enable PF vport */
+ ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events);
+ if (ret)
+ return ret;
+
+ /* Enable external host PF HCA */
+ ret = host_pf_enable_hca(esw->dev);
+ if (ret)
+ goto pf_hca_err;
+
+ /* Enable ECPF vport */
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events);
+ if (ret)
+ goto ecpf_err;
+ }
+
+ /* Enable VF vports */
+ ret = mlx5_eswitch_load_vf_vports(esw, esw->esw_funcs.num_vfs,
+ enabled_events);
+ if (ret)
+ goto vf_err;
+ return 0;
+
+vf_err:
+ if (mlx5_ecpf_vport_exists(esw->dev))
+ mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
+ecpf_err:
+ host_pf_disable_hca(esw->dev);
+pf_hca_err:
+ mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
+ return ret;
+}
+
+/* mlx5_eswitch_disable_pf_vf_vports() disables vports of PF, ECPF and VFs
+ * whichever are previously enabled on the eswitch.
+ */
+void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
+{
+ mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
+
+ if (mlx5_ecpf_vport_exists(esw->dev))
+ mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
+
+ host_pf_disable_hca(esw->dev);
+ mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
+}
+
+static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw)
+{
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
+ &val);
+ if (!err) {
+ esw->params.large_group_num = val.vu32;
+ } else {
+ esw_warn(esw->dev,
+ "Devlink can't get param fdb_large_groups, uses default (%d).\n",
+ ESW_OFFLOADS_DEFAULT_NUM_GROUPS);
+ esw->params.large_group_num = ESW_OFFLOADS_DEFAULT_NUM_GROUPS;
+ }
+}
+
+static void
+mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs)
+{
+ const u32 *out;
+
+ if (num_vfs < 0)
+ return;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ esw->esw_funcs.num_vfs = num_vfs;
+ return;
+ }
+
+ out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(out))
+ return;
+
+ esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_num_of_vfs);
+ kvfree(out);
+}
+
+static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode)
+{
+ struct mlx5_esw_event_info info = {};
+
+ info.new_mode = mode;
+
+ blocking_notifier_call_chain(&esw->n_head, 0, &info);
+}
+
+static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ int total_vports;
+ int err;
+
+ if (esw->flags & MLX5_ESWITCH_VPORT_ACL_NS_CREATED)
+ return 0;
+
+ total_vports = mlx5_eswitch_get_total_vports(dev);
+
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
+ err = mlx5_fs_egress_acls_init(dev, total_vports);
+ if (err)
+ return err;
+ } else {
+ esw_warn(dev, "engress ACL is not supported by FW\n");
+ }
+
+ if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
+ err = mlx5_fs_ingress_acls_init(dev, total_vports);
+ if (err)
+ goto err;
+ } else {
+ esw_warn(dev, "ingress ACL is not supported by FW\n");
+ }
+ esw->flags |= MLX5_ESWITCH_VPORT_ACL_NS_CREATED;
+ return 0;
+
+err:
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
+ mlx5_fs_egress_acls_cleanup(dev);
+ return err;
+}
+
+static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+
+ esw->flags &= ~MLX5_ESWITCH_VPORT_ACL_NS_CREATED;
+ if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
+ mlx5_fs_ingress_acls_cleanup(dev);
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
+ mlx5_fs_egress_acls_cleanup(dev);
+}
+
+/**
+ * mlx5_eswitch_enable_locked - Enable eswitch
+ * @esw: Pointer to eswitch
+ * @num_vfs: Enable eswitch for given number of VFs. This is optional.
+ * Valid value are 0, > 0 and MLX5_ESWITCH_IGNORE_NUM_VFS.
+ * Caller should pass num_vfs > 0 when enabling eswitch for
+ * vf vports. Caller should pass num_vfs = 0, when eswitch
+ * is enabled without sriov VFs or when caller
+ * is unaware of the sriov state of the host PF on ECPF based
+ * eswitch. Caller should pass < 0 when num_vfs should be
+ * completely ignored. This is typically the case when eswitch
+ * is enabled without sriov regardless of PF/ECPF system.
+ * mlx5_eswitch_enable_locked() Enables eswitch in either legacy or offloads
+ * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports.
+ * It returns 0 on success or error code on failure.
+ */
+int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
+{
+ int err;
+
+ lockdep_assert_held(&esw->mode_lock);
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
+ esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
+ return -EOPNOTSUPP;
+ }
+
+ mlx5_eswitch_get_devlink_param(esw);
+
+ err = mlx5_esw_acls_ns_init(esw);
+ if (err)
+ return err;
+
+ mlx5_eswitch_update_num_of_vfs(esw, num_vfs);
+
+ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+ mlx5_eq_notifier_register(esw->dev, &esw->nb);
+
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
+ err = esw_legacy_enable(esw);
+ } else {
+ mlx5_rescan_drivers(esw->dev);
+ err = esw_offloads_enable(esw);
+ }
+
+ if (err)
+ goto abort;
+
+ esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED;
+
+ mlx5_eswitch_event_handler_register(esw);
+
+ esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
+
+ mlx5_esw_mode_change_notify(esw, esw->mode);
+
+ return 0;
+
+abort:
+ mlx5_esw_acls_ns_cleanup(esw);
+ return err;
+}
+
+/**
+ * mlx5_eswitch_enable - Enable eswitch
+ * @esw: Pointer to eswitch
+ * @num_vfs: Enable eswitch switch for given number of VFs.
+ * Caller must pass num_vfs > 0 when enabling eswitch for
+ * vf vports.
+ * mlx5_eswitch_enable() returns 0 on success or error code on failure.
+ */
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
+{
+ bool toggle_lag;
+ int ret;
+
+ if (!mlx5_esw_allowed(esw))
+ return 0;
+
+ devl_assert_locked(priv_to_devlink(esw->dev));
+
+ toggle_lag = !mlx5_esw_is_fdb_created(esw);
+
+ if (toggle_lag)
+ mlx5_lag_disable_change(esw->dev);
+
+ down_write(&esw->mode_lock);
+ if (!mlx5_esw_is_fdb_created(esw)) {
+ ret = mlx5_eswitch_enable_locked(esw, num_vfs);
+ } else {
+ enum mlx5_eswitch_vport_event vport_events;
+
+ vport_events = (esw->mode == MLX5_ESWITCH_LEGACY) ?
+ MLX5_LEGACY_SRIOV_VPORT_EVENTS : MLX5_VPORT_UC_ADDR_CHANGE;
+ ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events);
+ if (!ret)
+ esw->esw_funcs.num_vfs = num_vfs;
+ }
+ up_write(&esw->mode_lock);
+
+ if (toggle_lag)
+ mlx5_lag_enable_change(esw->dev);
+
+ return ret;
+}
+
+/* When disabling sriov, free driver level resources. */
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf)
+{
+ if (!mlx5_esw_allowed(esw))
+ return;
+
+ devl_assert_locked(priv_to_devlink(esw->dev));
+ down_write(&esw->mode_lock);
+ /* If driver is unloaded, this function is called twice by remove_one()
+ * and mlx5_unload(). Prevent the second call.
+ */
+ if (!esw->esw_funcs.num_vfs && !clear_vf)
+ goto unlock;
+
+ esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
+
+ mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
+ if (clear_vf)
+ mlx5_eswitch_clear_vf_vports_info(esw);
+ /* If disabling sriov in switchdev mode, free meta rules here
+ * because it depends on num_vfs.
+ */
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS) {
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+
+ devl_rate_nodes_destroy(devlink);
+ }
+ /* Destroy legacy fdb when disabling sriov in legacy mode. */
+ if (esw->mode == MLX5_ESWITCH_LEGACY)
+ mlx5_eswitch_disable_locked(esw);
+
+ esw->esw_funcs.num_vfs = 0;
+
+unlock:
+ up_write(&esw->mode_lock);
+}
+
+/* Free resources for corresponding eswitch mode. It is called by devlink
+ * when changing eswitch mode or modprobe when unloading driver.
+ */
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw)
+{
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+
+ /* Notify eswitch users that it is exiting from current mode.
+ * So that it can do necessary cleanup before the eswitch is disabled.
+ */
+ mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY);
+
+ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+ mlx5_eswitch_event_handler_unregister(esw);
+
+ esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
+
+ if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) {
+ esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED;
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ esw_offloads_disable(esw);
+ else if (esw->mode == MLX5_ESWITCH_LEGACY)
+ esw_legacy_disable(esw);
+ mlx5_esw_acls_ns_cleanup(esw);
+ }
+
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ devl_rate_nodes_destroy(devlink);
+}
+
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
+{
+ if (!mlx5_esw_allowed(esw))
+ return;
+
+ devl_assert_locked(priv_to_devlink(esw->dev));
+ mlx5_lag_disable_change(esw->dev);
+ down_write(&esw->mode_lock);
+ mlx5_eswitch_disable_locked(esw);
+ esw->mode = MLX5_ESWITCH_LEGACY;
+ up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
+}
+
+static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
+{
+ u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ MLX5_SET(query_hca_cap_in, in, function_id, MLX5_VPORT_PF);
+ MLX5_SET(query_hca_cap_in, in, other_function, true);
+ return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
+}
+
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id)
+
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ if (!mlx5_core_is_ecpf(dev)) {
+ *max_sfs = 0;
+ return 0;
+ }
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_cap_host_pf(dev, query_ctx);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ *max_sfs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_sf);
+ *sf_base_id = MLX5_GET(cmd_hca_cap, hca_caps, sf_base_id);
+
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
+static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev,
+ int index, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+ int err;
+
+ vport = kzalloc(sizeof(*vport), GFP_KERNEL);
+ if (!vport)
+ return -ENOMEM;
+
+ vport->dev = esw->dev;
+ vport->vport = vport_num;
+ vport->index = index;
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler);
+ err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL);
+ if (err)
+ goto insert_err;
+
+ esw->total_vports++;
+ return 0;
+
+insert_err:
+ kfree(vport);
+ return err;
+}
+
+static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ xa_erase(&esw->vports, vport->vport);
+ kfree(vport);
+}
+
+static void mlx5_esw_vports_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ mlx5_esw_for_each_vport(esw, i, vport)
+ mlx5_esw_vport_free(esw, vport);
+ xa_destroy(&esw->vports);
+}
+
+static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ u16 max_host_pf_sfs;
+ u16 base_sf_num;
+ int idx = 0;
+ int err;
+ int i;
+
+ xa_init(&esw->vports);
+
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF);
+ if (err)
+ goto err;
+ if (esw->first_host_vport == MLX5_VPORT_PF)
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ idx++;
+
+ for (i = 0; i < mlx5_core_max_vfs(dev); i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, idx);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF);
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ idx++;
+ }
+ base_sf_num = mlx5_sf_start_function_id(dev);
+ for (i = 0; i < mlx5_sf_max_functions(dev); i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+ idx++;
+ }
+
+ err = mlx5_esw_sf_max_hpf_functions(dev, &max_host_pf_sfs, &base_sf_num);
+ if (err)
+ goto err;
+ for (i = 0; i < max_host_pf_sfs; i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+ idx++;
+ }
+
+ if (mlx5_ecpf_vport_exists(dev)) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF);
+ if (err)
+ goto err;
+ idx++;
+ }
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK);
+ if (err)
+ goto err;
+ return 0;
+
+err:
+ mlx5_esw_vports_cleanup(esw);
+ return err;
+}
+
+int mlx5_eswitch_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw;
+ int err;
+
+ if (!MLX5_VPORT_MANAGER(dev))
+ return 0;
+
+ esw = kzalloc(sizeof(*esw), GFP_KERNEL);
+ if (!esw)
+ return -ENOMEM;
+
+ esw->dev = dev;
+ esw->manager_vport = mlx5_eswitch_manager_vport(dev);
+ esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev);
+
+ esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
+ if (!esw->work_queue) {
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ err = mlx5_esw_vports_init(esw);
+ if (err)
+ goto abort;
+
+ err = esw_offloads_init_reps(esw);
+ if (err)
+ goto reps_err;
+
+ mutex_init(&esw->offloads.encap_tbl_lock);
+ hash_init(esw->offloads.encap_tbl);
+ mutex_init(&esw->offloads.decap_tbl_lock);
+ hash_init(esw->offloads.decap_tbl);
+ mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr);
+ atomic64_set(&esw->offloads.num_flows, 0);
+ ida_init(&esw->offloads.vport_metadata_ida);
+ xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
+ mutex_init(&esw->state_lock);
+ init_rwsem(&esw->mode_lock);
+ refcount_set(&esw->qos.refcnt, 0);
+
+ esw->enabled_vports = 0;
+ esw->mode = MLX5_ESWITCH_LEGACY;
+ esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+ else
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (MLX5_ESWITCH_MANAGER(dev) &&
+ mlx5_esw_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+ dev->priv.eswitch = esw;
+ BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
+
+ esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev));
+ esw_info(dev,
+ "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
+ esw->total_vports,
+ MLX5_MAX_UC_PER_VPORT(dev),
+ MLX5_MAX_MC_PER_VPORT(dev));
+ return 0;
+
+reps_err:
+ mlx5_esw_vports_cleanup(esw);
+abort:
+ if (esw->work_queue)
+ destroy_workqueue(esw->work_queue);
+ kfree(esw);
+ return err;
+}
+
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
+{
+ if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
+ return;
+
+ esw_info(esw->dev, "cleanup\n");
+
+ debugfs_remove_recursive(esw->dbgfs);
+ esw->dev->priv.eswitch = NULL;
+ destroy_workqueue(esw->work_queue);
+ WARN_ON(refcount_read(&esw->qos.refcnt));
+ mutex_destroy(&esw->state_lock);
+ WARN_ON(!xa_empty(&esw->offloads.vhca_map));
+ xa_destroy(&esw->offloads.vhca_map);
+ ida_destroy(&esw->offloads.vport_metadata_ida);
+ mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr);
+ mutex_destroy(&esw->offloads.encap_tbl_lock);
+ mutex_destroy(&esw->offloads.decap_tbl_lock);
+ esw_offloads_cleanup_reps(esw);
+ mlx5_esw_vports_cleanup(esw);
+ kfree(esw);
+}
+
+/* Vport Administration */
+static int
+mlx5_esw_set_vport_mac_locked(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport, const u8 *mac)
+{
+ u16 vport_num = evport->vport;
+ u64 node_guid;
+ int err = 0;
+
+ if (is_multicast_ether_addr(mac))
+ return -EINVAL;
+
+ if (evport->info.spoofchk && !is_valid_ether_addr(mac))
+ mlx5_core_warn(esw->dev,
+ "Set invalid MAC while spoofchk is on, vport(%d)\n",
+ vport_num);
+
+ err = mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, mac);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
+ vport_num, err);
+ return err;
+ }
+
+ node_guid_gen_from_mac(&node_guid, mac);
+ err = mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, node_guid);
+ if (err)
+ mlx5_core_warn(esw->dev,
+ "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
+ vport_num, err);
+
+ ether_addr_copy(evport->info.mac, mac);
+ evport->info.node_guid = node_guid;
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
+ err = esw_acl_ingress_lgcy_setup(esw, evport);
+
+ return err;
+}
+
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+ u16 vport, const u8 *mac)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int err = 0;
+
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_set_vport_mac_locked(esw, evport, mac);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return false;
+
+ return xa_get_mark(&esw->vports, vport_num, mark);
+}
+
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF);
+}
+
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF);
+}
+
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+ u16 vport, int link_state)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int opmod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
+ int other_vport = 1;
+ int err = 0;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ if (vport == MLX5_VPORT_UPLINK) {
+ opmod = MLX5_VPORT_STATE_OP_MOD_UPLINK;
+ other_vport = 0;
+ vport = 0;
+ }
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ err = mlx5_modify_vport_admin_state(esw->dev, opmod, vport, other_vport, link_state);
+ if (err) {
+ mlx5_core_warn(esw->dev, "Failed to set vport %d link state, opmod = %d, err = %d",
+ vport, opmod, err);
+ goto unlock;
+ }
+
+ evport->info.link_state = link_state;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+ u16 vport, struct ifla_vf_info *ivi)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ memset(ivi, 0, sizeof(*ivi));
+ ivi->vf = vport - 1;
+
+ mutex_lock(&esw->state_lock);
+ ether_addr_copy(ivi->mac, evport->info.mac);
+ ivi->linkstate = evport->info.link_state;
+ ivi->vlan = evport->info.vlan;
+ ivi->qos = evport->info.qos;
+ ivi->spoofchk = evport->info.spoofchk;
+ ivi->trusted = evport->info.trusted;
+ if (evport->qos.enabled) {
+ ivi->min_tx_rate = evport->qos.min_rate;
+ ivi->max_tx_rate = evport->qos.max_rate;
+ }
+ mutex_unlock(&esw->state_lock);
+
+ return 0;
+}
+
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ u16 vport, u16 vlan, u8 qos, u8 set_flags)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+ int err = 0;
+
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+ if (vlan > 4095 || qos > 7)
+ return -EINVAL;
+
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) {
+ err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
+ if (err)
+ return err;
+ }
+
+ evport->info.vlan = vlan;
+ evport->info.qos = qos;
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) {
+ err = esw_acl_ingress_lgcy_setup(esw, evport);
+ if (err)
+ return err;
+ err = esw_acl_egress_lgcy_setup(esw, evport);
+ }
+
+ return err;
+}
+
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+ u16 vport_num,
+ struct ifla_vf_stats *vf_stats)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+ int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {};
+ struct mlx5_vport_drop_stats stats = {};
+ int err = 0;
+ u32 *out;
+
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_vport_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ MLX5_SET(query_vport_counter_in, in, op_mod, 0);
+ MLX5_SET(query_vport_counter_in, in, vport_number, vport->vport);
+ MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec_inout(esw->dev, query_vport_counter, in, out);
+ if (err)
+ goto free_out;
+
+ #define MLX5_GET_CTR(p, x) \
+ MLX5_GET64(query_vport_counter_out, p, x)
+
+ memset(vf_stats, 0, sizeof(*vf_stats));
+ vf_stats->rx_packets =
+ MLX5_GET_CTR(out, received_eth_unicast.packets) +
+ MLX5_GET_CTR(out, received_ib_unicast.packets) +
+ MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_ib_multicast.packets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+ vf_stats->rx_bytes =
+ MLX5_GET_CTR(out, received_eth_unicast.octets) +
+ MLX5_GET_CTR(out, received_ib_unicast.octets) +
+ MLX5_GET_CTR(out, received_eth_multicast.octets) +
+ MLX5_GET_CTR(out, received_ib_multicast.octets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.octets);
+
+ vf_stats->tx_packets =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_ib_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
+
+ vf_stats->tx_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_ib_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_multicast.octets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
+
+ vf_stats->multicast =
+ MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_ib_multicast.packets);
+
+ vf_stats->broadcast =
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+ err = mlx5_esw_query_vport_drop_stats(esw->dev, vport, &stats);
+ if (err)
+ goto free_out;
+ vf_stats->rx_dropped = stats.rx_dropped;
+ vf_stats->tx_dropped = stats.tx_dropped;
+
+free_out:
+ kvfree(out);
+ return err;
+}
+
+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+ return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_LEGACY;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
+
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw;
+
+ esw = dev->priv.eswitch;
+ return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS) ? esw->offloads.encap :
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
+
+bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS);
+}
+
+int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&esw->n_head, nb);
+}
+
+void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&esw->n_head, nb);
+}
+
+/**
+ * mlx5_esw_hold() - Try to take a read lock on esw mode lock.
+ * @mdev: mlx5 core device.
+ *
+ * Should be called by esw resources callers.
+ *
+ * Return: true on success or false.
+ */
+bool mlx5_esw_hold(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ /* e.g. VF doesn't have eswitch so nothing to do */
+ if (!mlx5_esw_allowed(esw))
+ return true;
+
+ if (down_read_trylock(&esw->mode_lock) != 0)
+ return true;
+
+ return false;
+}
+
+/**
+ * mlx5_esw_release() - Release a read lock on esw mode lock.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_release(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ if (mlx5_esw_allowed(esw))
+ up_read(&esw->mode_lock);
+}
+
+/**
+ * mlx5_esw_get() - Increase esw user count.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_get(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ if (mlx5_esw_allowed(esw))
+ atomic64_inc(&esw->user_count);
+}
+
+/**
+ * mlx5_esw_put() - Decrease esw user count.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_put(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ if (mlx5_esw_allowed(esw))
+ atomic64_dec_if_positive(&esw->user_count);
+}
+
+/**
+ * mlx5_esw_try_lock() - Take a write lock on esw mode lock.
+ * @esw: eswitch device.
+ *
+ * Should be called by esw mode change routine.
+ *
+ * Return:
+ * * 0 - esw mode if successfully locked and refcount is 0.
+ * * -EBUSY - refcount is not 0.
+ * * -EINVAL - In the middle of switching mode or lock is already held.
+ */
+int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
+{
+ if (down_write_trylock(&esw->mode_lock) == 0)
+ return -EINVAL;
+
+ if (atomic64_read(&esw->user_count) > 0) {
+ up_write(&esw->mode_lock);
+ return -EBUSY;
+ }
+
+ return esw->mode;
+}
+
+/**
+ * mlx5_esw_unlock() - Release write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_unlock(struct mlx5_eswitch *esw)
+{
+ up_write(&esw->mode_lock);
+}
+
+/**
+ * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
+ *
+ * @dev: Pointer to core device
+ *
+ * mlx5_eswitch_get_total_vports returns total number of eswitch vports.
+ */
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw;
+
+ esw = dev->priv.eswitch;
+ return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
+
+/**
+ * mlx5_eswitch_get_core_dev - Get the mdev device
+ * @esw : eswitch device.
+ *
+ * Return the mellanox core device which manages the eswitch.
+ */
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+ return mlx5_esw_allowed(esw) ? esw->dev : NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
new file mode 100644
index 000000000..a3daca44f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -0,0 +1,804 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_ESWITCH_H__
+#define __MLX5_ESWITCH_H__
+
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <linux/atomic.h>
+#include <linux/xarray.h>
+#include <net/devlink.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "lib/mpfs.h"
+#include "lib/fs_chains.h"
+#include "sf/sf.h"
+#include "en/tc_ct.h"
+#include "en/tc/sample.h"
+
+enum mlx5_mapped_obj_type {
+ MLX5_MAPPED_OBJ_CHAIN,
+ MLX5_MAPPED_OBJ_SAMPLE,
+ MLX5_MAPPED_OBJ_INT_PORT_METADATA,
+};
+
+struct mlx5_mapped_obj {
+ enum mlx5_mapped_obj_type type;
+ union {
+ u32 chain;
+ struct {
+ u32 group_id;
+ u32 rate;
+ u32 trunc_size;
+ u32 tunnel_id;
+ } sample;
+ u32 int_port_metadata;
+ };
+};
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+#define ESW_OFFLOADS_DEFAULT_NUM_GROUPS 15
+
+#define MLX5_MAX_UC_PER_VPORT(dev) \
+ (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
+
+#define MLX5_MAX_MC_PER_VPORT(dev) \
+ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
+
+#define mlx5_esw_has_fwd_fdb(dev) \
+ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
+
+#define esw_chains(esw) \
+ ((esw)->fdb_table.offloads.esw_chains_priv)
+
+enum {
+ MAPPING_TYPE_CHAIN,
+ MAPPING_TYPE_TUNNEL,
+ MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ MAPPING_TYPE_LABELS,
+ MAPPING_TYPE_ZONE,
+ MAPPING_TYPE_INT_PORT,
+};
+
+struct vport_ingress {
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_handle *allow_rule;
+ struct {
+ struct mlx5_flow_group *allow_spoofchk_only_grp;
+ struct mlx5_flow_group *allow_untagged_spoofchk_grp;
+ struct mlx5_flow_group *allow_untagged_only_grp;
+ struct mlx5_flow_group *drop_grp;
+ struct mlx5_flow_handle *drop_rule;
+ struct mlx5_fc *drop_counter;
+ } legacy;
+ struct {
+ /* Optional group to add an FTE to do internal priority
+ * tagging on ingress packets.
+ */
+ struct mlx5_flow_group *metadata_prio_tag_grp;
+ /* Group to add default match-all FTE entry to tag ingress
+ * packet with metadata.
+ */
+ struct mlx5_flow_group *metadata_allmatch_grp;
+ /* Optional group to add a drop all rule */
+ struct mlx5_flow_group *drop_grp;
+ struct mlx5_modify_hdr *modify_metadata;
+ struct mlx5_flow_handle *modify_metadata_rule;
+ struct mlx5_flow_handle *drop_rule;
+ } offloads;
+};
+
+struct vport_egress {
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_handle *allowed_vlan;
+ struct mlx5_flow_group *vlan_grp;
+ union {
+ struct {
+ struct mlx5_flow_group *drop_grp;
+ struct mlx5_flow_handle *drop_rule;
+ struct mlx5_fc *drop_counter;
+ } legacy;
+ struct {
+ struct mlx5_flow_group *fwd_grp;
+ struct mlx5_flow_handle *fwd_rule;
+ struct mlx5_flow_handle *bounce_rule;
+ struct mlx5_flow_group *bounce_grp;
+ } offloads;
+ };
+};
+
+struct mlx5_vport_drop_stats {
+ u64 rx_dropped;
+ u64 tx_dropped;
+};
+
+struct mlx5_vport_info {
+ u8 mac[ETH_ALEN];
+ u16 vlan;
+ u64 node_guid;
+ int link_state;
+ u8 qos;
+ u8 spoofchk: 1;
+ u8 trusted: 1;
+};
+
+/* Vport context events */
+enum mlx5_eswitch_vport_event {
+ MLX5_VPORT_UC_ADDR_CHANGE = BIT(0),
+ MLX5_VPORT_MC_ADDR_CHANGE = BIT(1),
+ MLX5_VPORT_PROMISC_CHANGE = BIT(3),
+};
+
+struct mlx5_vport {
+ struct mlx5_core_dev *dev;
+ struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
+ struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE];
+ struct mlx5_flow_handle *promisc_rule;
+ struct mlx5_flow_handle *allmulti_rule;
+ struct work_struct vport_change_handler;
+
+ struct vport_ingress ingress;
+ struct vport_egress egress;
+ u32 default_metadata;
+ u32 metadata;
+
+ struct mlx5_vport_info info;
+
+ struct {
+ bool enabled;
+ u32 esw_tsar_ix;
+ u32 bw_share;
+ u32 min_rate;
+ u32 max_rate;
+ struct mlx5_esw_rate_group *group;
+ } qos;
+
+ u16 vport;
+ bool enabled;
+ enum mlx5_eswitch_vport_event enabled_events;
+ int index;
+ struct devlink_port *dl_port;
+ struct dentry *dbgfs;
+};
+
+struct mlx5_esw_indir_table;
+
+struct mlx5_eswitch_fdb {
+ union {
+ struct legacy_fdb {
+ struct mlx5_flow_table *fdb;
+ struct mlx5_flow_group *addr_grp;
+ struct mlx5_flow_group *allmulti_grp;
+ struct mlx5_flow_group *promisc_grp;
+ struct mlx5_flow_table *vepa_fdb;
+ struct mlx5_flow_handle *vepa_uplink_rule;
+ struct mlx5_flow_handle *vepa_star_rule;
+ } legacy;
+
+ struct offloads_fdb {
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *tc_miss_table;
+ struct mlx5_flow_table *slow_fdb;
+ struct mlx5_flow_group *send_to_vport_grp;
+ struct mlx5_flow_group *send_to_vport_meta_grp;
+ struct mlx5_flow_group *peer_miss_grp;
+ struct mlx5_flow_handle **peer_miss_rules;
+ struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle **send_to_vport_meta_rules;
+ struct mlx5_flow_handle *miss_rule_uni;
+ struct mlx5_flow_handle *miss_rule_multi;
+ int vlan_push_pop_refcount;
+
+ struct mlx5_fs_chains *esw_chains_priv;
+ struct {
+ DECLARE_HASHTABLE(table, 8);
+ /* Protects vports.table */
+ struct mutex lock;
+ } vports;
+
+ struct mlx5_esw_indir_table *indir;
+
+ } offloads;
+ };
+ u32 flags;
+};
+
+struct mlx5_esw_offload {
+ struct mlx5_flow_table *ft_offloads_restore;
+ struct mlx5_flow_group *restore_group;
+ struct mlx5_modify_hdr *restore_copy_hdr_id;
+ struct mapping_ctx *reg_c0_obj_pool;
+
+ struct mlx5_flow_table *ft_offloads;
+ struct mlx5_flow_group *vport_rx_group;
+ struct mlx5_flow_group *vport_rx_drop_group;
+ struct mlx5_flow_handle *vport_rx_drop_rule;
+ struct xarray vport_reps;
+ struct list_head peer_flows;
+ struct mutex peer_mutex;
+ struct mutex encap_tbl_lock; /* protects encap_tbl */
+ DECLARE_HASHTABLE(encap_tbl, 8);
+ struct mutex decap_tbl_lock; /* protects decap_tbl */
+ DECLARE_HASHTABLE(decap_tbl, 8);
+ struct mod_hdr_tbl mod_hdr;
+ DECLARE_HASHTABLE(termtbl_tbl, 8);
+ struct mutex termtbl_mutex; /* protects termtbl hash */
+ struct xarray vhca_map;
+ const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
+ u8 inline_mode;
+ atomic64_t num_flows;
+ enum devlink_eswitch_encap_mode encap;
+ struct ida vport_metadata_ida;
+ unsigned int host_number; /* ECPF supports one external host */
+};
+
+/* E-Switch MC FDB table hash node */
+struct esw_mc_addr { /* SRIOV only */
+ struct l2addr_node node;
+ struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */
+ u32 refcnt;
+};
+
+struct mlx5_host_work {
+ struct work_struct work;
+ struct mlx5_eswitch *esw;
+};
+
+struct mlx5_esw_functions {
+ struct mlx5_nb nb;
+ u16 num_vfs;
+};
+
+enum {
+ MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+ MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1),
+ MLX5_ESWITCH_VPORT_ACL_NS_CREATED = BIT(2),
+};
+
+struct mlx5_esw_bridge_offloads;
+
+enum {
+ MLX5_ESW_FDB_CREATED = BIT(0),
+};
+
+struct mlx5_eswitch {
+ struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
+ struct mlx5_eswitch_fdb fdb_table;
+ /* legacy data structures */
+ struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
+ struct esw_mc_addr mc_promisc;
+ /* end of legacy */
+ struct workqueue_struct *work_queue;
+ struct xarray vports;
+ u32 flags;
+ int total_vports;
+ int enabled_vports;
+ /* Synchronize between vport change events
+ * and async SRIOV admin state changes
+ */
+ struct mutex state_lock;
+
+ /* Protects eswitch mode change that occurs via one or more
+ * user commands, i.e. sriov state change, devlink commands.
+ */
+ struct rw_semaphore mode_lock;
+ atomic64_t user_count;
+
+ struct {
+ u32 root_tsar_ix;
+ struct mlx5_esw_rate_group *group0;
+ struct list_head groups; /* Protected by esw->state_lock */
+
+ /* Protected by esw->state_lock.
+ * Initially 0, meaning no QoS users and QoS is disabled.
+ */
+ refcount_t refcnt;
+ } qos;
+
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ struct mlx5_esw_offload offloads;
+ int mode;
+ u16 manager_vport;
+ u16 first_host_vport;
+ struct mlx5_esw_functions esw_funcs;
+ struct {
+ u32 large_group_num;
+ } params;
+ struct blocking_notifier_head n_head;
+ struct dentry *dbgfs;
+ bool paired[MLX5_MAX_PORTS];
+};
+
+void esw_offloads_disable(struct mlx5_eswitch *esw);
+int esw_offloads_enable(struct mlx5_eswitch *esw);
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
+int esw_offloads_init_reps(struct mlx5_eswitch *esw);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule);
+
+bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
+int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
+u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
+void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
+
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps);
+
+/* E-Switch API */
+int mlx5_eswitch_init(struct mlx5_core_dev *dev);
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
+
+#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1)
+int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs);
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
+void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw);
+void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw);
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+ u16 vport, const u8 *mac);
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+ u16 vport, int link_state);
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ u16 vport, u16 vlan, u8 qos);
+int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
+ u16 vport, bool spoofchk);
+int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+ u16 vport_num, bool setting);
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
+ u32 max_rate, u32 min_rate);
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack);
+int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
+int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+ u16 vport, struct ifla_vf_info *ivi);
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+ u16 vport,
+ struct ifla_vf_stats *vf_stats);
+void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
+
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
+ bool other_vport, void *in);
+
+struct mlx5_flow_spec;
+struct mlx5_esw_flow_attr;
+struct mlx5_termtbl_handle;
+
+bool
+mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_spec *spec);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int num_dest);
+
+void
+mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
+ struct mlx5_termtbl_handle *tt);
+
+void
+mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
+ struct mlx5_flow_destination *dest);
+
+enum {
+ SET_VLAN_STRIP = BIT(0),
+ SET_VLAN_INSERT = BIT(1)
+};
+
+enum mlx5_flow_match_level {
+ MLX5_MATCH_NONE = MLX5_INLINE_MODE_NONE,
+ MLX5_MATCH_L2 = MLX5_INLINE_MODE_L2,
+ MLX5_MATCH_L3 = MLX5_INLINE_MODE_IP,
+ MLX5_MATCH_L4 = MLX5_INLINE_MODE_TCP_UDP,
+};
+
+/* current maximum for flow based vport multicasting */
+#define MLX5_MAX_FLOW_FWD_VPORTS 32
+
+enum {
+ MLX5_ESW_DEST_ENCAP = BIT(0),
+ MLX5_ESW_DEST_ENCAP_VALID = BIT(1),
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2),
+};
+
+struct mlx5_esw_flow_attr {
+ struct mlx5_eswitch_rep *in_rep;
+ struct mlx5_core_dev *in_mdev;
+ struct mlx5_core_dev *counter_dev;
+ struct mlx5e_tc_int_port *dest_int_port;
+ struct mlx5e_tc_int_port *int_port;
+
+ int split_count;
+ int out_count;
+
+ __be16 vlan_proto[MLX5_FS_VLAN_DEPTH];
+ u16 vlan_vid[MLX5_FS_VLAN_DEPTH];
+ u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
+ u8 total_vlan;
+ struct {
+ u32 flags;
+ struct mlx5_eswitch_rep *rep;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ struct mlx5_core_dev *mdev;
+ struct mlx5_termtbl_handle *termtbl;
+ int src_port_rewrite_act_id;
+ } dests[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5_rx_tun_attr *rx_tun_attr;
+ struct ethhdr eth;
+ struct mlx5_pkt_reformat *decap_pkt_reformat;
+};
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *encap);
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
+ u8 *hw_addr, int *hw_addr_len,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const u8 *hw_addr, int hw_addr_len,
+ struct netlink_ext_ack *extack);
+
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr);
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr);
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ u16 vport, u16 vlan, u8 qos, u8 set_flags);
+
+static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw)
+{
+ return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) &&
+ MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan));
+}
+
+static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
+ u8 vlan_depth)
+{
+ bool ret = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan);
+
+ if (vlan_depth == 1)
+ return ret;
+
+ return ret && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan_2) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
+}
+
+bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1);
+
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev);
+
+#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
+
+#define esw_info(__dev, format, ...) \
+ dev_info((__dev)->device, "E-Switch: " format, ##__VA_ARGS__)
+
+#define esw_warn(__dev, format, ...) \
+ dev_warn((__dev)->device, "E-Switch: " format, ##__VA_ARGS__)
+
+#define esw_debug(dev, format, ...) \
+ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+
+static inline bool mlx5_esw_allowed(const struct mlx5_eswitch *esw)
+{
+ return esw && MLX5_ESWITCH_MANAGER(esw->dev);
+}
+
+/* The returned number is valid only when the dev is eswitch manager. */
+static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
+{
+ return mlx5_core_is_ecpf_esw_manager(dev) ?
+ MLX5_VPORT_ECPF : MLX5_VPORT_PF;
+}
+
+static inline bool
+mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return esw->manager_vport == vport_num;
+}
+
+static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
+{
+ return mlx5_core_is_ecpf_esw_manager(dev) ?
+ MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
+}
+
+static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev)
+{
+ return mlx5_core_is_ecpf_esw_manager(dev);
+}
+
+static inline unsigned int
+mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
+ u16 vport_num)
+{
+ return (MLX5_CAP_GEN(dev, vhca_id) << 16) | vport_num;
+}
+
+static inline u16
+mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index)
+{
+ return dl_port_index & 0xffff;
+}
+
+static inline bool mlx5_esw_is_fdb_created(struct mlx5_eswitch *esw)
+{
+ return esw->fdb_table.flags & MLX5_ESW_FDB_CREATED;
+}
+
+/* TODO: This mlx5e_tc function shouldn't be called by eswitch */
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
+
+/* Each mark identifies eswitch vport type.
+ * MLX5_ESW_VPT_HOST_FN is used to identify both PF and VF ports using
+ * a single mark.
+ * MLX5_ESW_VPT_VF identifies a SRIOV VF vport.
+ * MLX5_ESW_VPT_SF identifies SF vport.
+ */
+#define MLX5_ESW_VPT_HOST_FN XA_MARK_0
+#define MLX5_ESW_VPT_VF XA_MARK_1
+#define MLX5_ESW_VPT_SF XA_MARK_2
+
+/* The vport iterator is valid only after vport are initialized in mlx5_eswitch_init.
+ * Borrowed the idea from xa_for_each_marked() but with support for desired last element.
+ */
+
+#define mlx5_esw_for_each_vport(esw, index, vport) \
+ xa_for_each(&((esw)->vports), index, vport)
+
+#define mlx5_esw_for_each_entry_marked(xa, index, entry, last, filter) \
+ for (index = 0, entry = xa_find(xa, &index, last, filter); \
+ entry; entry = xa_find_after(xa, &index, last, filter))
+
+#define mlx5_esw_for_each_vport_marked(esw, index, vport, last, filter) \
+ mlx5_esw_for_each_entry_marked(&((esw)->vports), index, vport, last, filter)
+
+#define mlx5_esw_for_each_vf_vport(esw, index, vport, last) \
+ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_VF)
+
+#define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \
+ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN)
+
+struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink);
+struct mlx5_vport *__must_check
+mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
+
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
+
+int
+mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+ enum mlx5_eswitch_vport_event enabled_events);
+void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw);
+
+int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events);
+void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num);
+
+int
+esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport);
+void
+esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport);
+
+struct esw_vport_tbl_namespace {
+ int max_fte;
+ int max_num_groups;
+ u32 flags;
+};
+
+struct mlx5_vport_tbl_attr {
+ u32 chain;
+ u16 prio;
+ u16 vport;
+ struct esw_vport_tbl_namespace *vport_ns;
+};
+
+struct mlx5_flow_table *
+mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
+void
+mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
+
+struct mlx5_flow_handle *
+esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag);
+
+int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num);
+void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
+ enum mlx5_eswitch_vport_event enabled_events);
+void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
+ enum mlx5_eswitch_vport_event enabled_events);
+void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs);
+
+int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
+struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
+
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num);
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
+ u16 vport_num, u32 controller, u32 sfnum);
+void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
+
+int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
+ u16 vport_num, u32 controller, u32 sfnum);
+void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id);
+
+int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num);
+
+/**
+ * mlx5_esw_event_info - Indicates eswitch mode changed/changing.
+ *
+ * @new_mode: New mode of eswitch.
+ */
+struct mlx5_esw_event_info {
+ u16 new_mode;
+};
+
+int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *n);
+void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *n);
+
+bool mlx5_esw_hold(struct mlx5_core_dev *dev);
+void mlx5_esw_release(struct mlx5_core_dev *dev);
+void mlx5_esw_get(struct mlx5_core_dev *dev);
+void mlx5_esw_put(struct mlx5_core_dev *dev);
+int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
+void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+
+void esw_vport_change_handle_locked(struct mlx5_vport *vport);
+
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
+
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+
+static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
+{
+ if (mlx5_esw_allowed(esw))
+ return esw->esw_funcs.num_vfs;
+
+ return 0;
+}
+
+#else /* CONFIG_MLX5_ESWITCH */
+/* eswitch API stubs */
+static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
+static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
+static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
+static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {}
+static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {}
+static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }
+static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline struct mlx5_flow_handle *
+esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline unsigned int
+mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
+ u16 vport_num)
+{
+ return vport_num;
+}
+
+static inline int
+mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ return 0;
+}
+
+static inline void
+mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw) {}
+
+static inline int
+mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ return 0;
+}
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
new file mode 100644
index 000000000..433cdd0a2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -0,0 +1,4040 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/indir_table.h"
+#include "esw/acl/ofld.h"
+#include "rdma.h"
+#include "en.h"
+#include "fs_core.h"
+#include "lib/devcom.h"
+#include "lib/eq.h"
+#include "lib/fs_chains.h"
+#include "en_tc.h"
+#include "en/mapping.h"
+#include "devlink.h"
+#include "lag/lag.h"
+
+#define mlx5_esw_for_each_rep(esw, i, rep) \
+ xa_for_each(&((esw)->offloads.vport_reps), i, rep)
+
+#define mlx5_esw_for_each_sf_rep(esw, i, rep) \
+ xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF)
+
+#define mlx5_esw_for_each_vf_rep(esw, index, rep) \
+ mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \
+ rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF)
+
+/* There are two match-all miss flows, one for unicast dst mac and
+ * one for multicast.
+ */
+#define MLX5_ESW_MISS_FLOWS (2)
+#define UPLINK_REP_INDEX 0
+
+#define MLX5_ESW_VPORT_TBL_SIZE 128
+#define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4
+
+#define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
+
+static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
+ .max_fte = MLX5_ESW_VPORT_TBL_SIZE,
+ .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
+ .flags = 0,
+};
+
+static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ return xa_load(&esw->offloads.vport_reps, vport_num);
+}
+
+static void
+mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) || !attr || !attr->in_rep)
+ return;
+
+ if (attr->int_port) {
+ spec->flow_context.flow_source = mlx5e_tc_int_port_get_flow_source(attr->int_port);
+
+ return;
+ }
+
+ spec->flow_context.flow_source = (attr->in_rep->vport == MLX5_VPORT_UPLINK) ?
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK :
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+}
+
+/* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits
+ * are not needed as well in the following process. So clear them all for simplicity.
+ */
+void
+mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec)
+{
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ void *misc2;
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
+
+ if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2)))
+ spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2;
+ }
+}
+
+static void
+mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_eswitch *src_esw,
+ u16 vport)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ u32 metadata;
+ void *misc2;
+ void *misc;
+
+ /* Use metadata matching because vport is not represented by single
+ * VHCA in dual-port RoCE mode, and matching on source vport may fail.
+ */
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ if (mlx5_esw_indir_table_decap_vport(attr))
+ vport = mlx5_esw_indir_table_decap_vport(attr);
+
+ if (!attr->chain && esw_attr && esw_attr->int_port)
+ metadata =
+ mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port);
+ else
+ metadata =
+ mlx5_eswitch_get_vport_metadata_for_match(src_esw, vport);
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, metadata);
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(src_esw->dev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ }
+}
+
+static int
+esw_setup_decap_indir(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_flow_table *ft;
+
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
+ return -EOPNOTSUPP;
+
+ ft = mlx5_esw_indir_table_get(esw, attr,
+ mlx5_esw_indir_table_decap_vport(attr), true);
+ return PTR_ERR_OR_ZERO(ft);
+}
+
+static void
+esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ if (mlx5_esw_indir_table_decap_vport(attr))
+ mlx5_esw_indir_table_put(esw,
+ mlx5_esw_indir_table_decap_vport(attr),
+ true);
+}
+
+static int
+esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ u32 sampler_id,
+ int i)
+{
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
+ dest[i].sampler_id = sampler_id;
+
+ return 0;
+}
+
+static int
+esw_setup_ft_dest(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ int i)
+{
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = attr->dest_ft;
+
+ if (mlx5_esw_indir_table_decap_vport(attr))
+ return esw_setup_decap_indir(esw, attr);
+ return 0;
+}
+
+static void
+esw_setup_accept_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_fs_chains *chains, int i)
+{
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
+}
+
+static void
+esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw, int i)
+{
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level))
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = esw->fdb_table.offloads.slow_fdb;
+}
+
+static int
+esw_setup_chain_dest(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_fs_chains *chains,
+ u32 chain, u32 prio, u32 level,
+ int i)
+{
+ struct mlx5_flow_table *ft;
+
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ ft = mlx5_chains_get_table(chains, chain, prio, level);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = ft;
+ return 0;
+}
+
+static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
+ int from, int to)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ int i;
+
+ for (i = from; i < to; i++)
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport,
+ esw_attr->dests[i].mdev))
+ mlx5_esw_indir_table_put(esw, esw_attr->dests[i].rep->vport,
+ false);
+}
+
+static bool
+esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr)
+{
+ int i;
+
+ for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ return true;
+ return false;
+}
+
+static int
+esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_fs_chains *chains,
+ struct mlx5_flow_attr *attr,
+ int *i)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int err;
+
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
+ return -EOPNOTSUPP;
+
+ /* flow steering cannot handle more than one dest with the same ft
+ * in a single flow
+ */
+ if (esw_attr->out_count - esw_attr->split_count > 1)
+ return -EOPNOTSUPP;
+
+ err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i);
+ if (err)
+ return err;
+
+ if (esw_attr->dests[esw_attr->split_count].pkt_reformat) {
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat;
+ }
+ (*i)++;
+
+ return 0;
+}
+
+static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
+}
+
+static bool
+esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ bool result = false;
+ int i;
+
+ /* Indirect table is supported only for flows with in_port uplink
+ * and the destination is vport on the same eswitch as the uplink,
+ * return false in case at least one of destinations doesn't meet
+ * this criteria.
+ */
+ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) {
+ if (esw_attr->dests[i].rep &&
+ mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport,
+ esw_attr->dests[i].mdev)) {
+ result = true;
+ } else {
+ result = false;
+ break;
+ }
+ }
+ return result;
+}
+
+static int
+esw_setup_indir_table(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ bool ignore_flow_lvl,
+ int *i)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int j, err;
+
+ if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
+ return -EOPNOTSUPP;
+
+ for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
+ if (ignore_flow_lvl)
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+ dest[*i].ft = mlx5_esw_indir_table_get(esw, attr,
+ esw_attr->dests[j].rep->vport, false);
+ if (IS_ERR(dest[*i].ft)) {
+ err = PTR_ERR(dest[*i].ft);
+ goto err_indir_tbl_get;
+ }
+ }
+
+ if (mlx5_esw_indir_table_decap_vport(attr)) {
+ err = esw_setup_decap_indir(esw, attr);
+ if (err)
+ goto err_indir_tbl_get;
+ }
+
+ return 0;
+
+err_indir_tbl_get:
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j);
+ return err;
+}
+
+static void esw_cleanup_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
+ esw_cleanup_decap_indir(esw, attr);
+}
+
+static void
+esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level)
+{
+ mlx5_chains_put_table(chains, chain, prio, level);
+}
+
+static void
+esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
+ int attr_idx, int dest_idx, bool pkt_reformat)
+{
+ dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport;
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+ dest[dest_idx].vport.vhca_id =
+ MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
+ dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK &&
+ mlx5_lag_mpesw_is_activated(esw->dev))
+ dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
+ }
+ if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) {
+ if (pkt_reformat) {
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
+ }
+ dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ dest[dest_idx].vport.pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
+ }
+}
+
+static int
+esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
+ int i)
+{
+ int j;
+
+ for (j = esw_attr->split_count; j < esw_attr->out_count; j++, i++)
+ esw_setup_vport_dest(dest, flow_act, esw, esw_attr, j, i, true);
+ return i;
+}
+
+static bool
+esw_src_port_rewrite_supported(struct mlx5_eswitch *esw)
+{
+ return MLX5_CAP_GEN(esw->dev, reg_c_preserve) &&
+ mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level);
+}
+
+static int
+esw_setup_dests(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ int *i)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ int err = 0;
+
+ if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
+ esw_src_port_rewrite_supported(esw))
+ attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE;
+
+ if (attr->flags & MLX5_ATTR_FLAG_SAMPLE &&
+ !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) {
+ esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i);
+ (*i)++;
+ } else if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) {
+ esw_setup_slow_path_dest(dest, flow_act, esw, *i);
+ (*i)++;
+ } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) {
+ esw_setup_accept_dest(dest, flow_act, chains, *i);
+ (*i)++;
+ } else if (esw_is_indir_table(esw, attr)) {
+ err = esw_setup_indir_table(dest, flow_act, esw, attr, true, i);
+ } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) {
+ err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i);
+ } else {
+ *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i);
+
+ if (attr->dest_ft) {
+ err = esw_setup_ft_dest(dest, flow_act, esw, attr, *i);
+ (*i)++;
+ } else if (attr->dest_chain) {
+ err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain,
+ 1, 0, *i);
+ (*i)++;
+ }
+ }
+
+ return err;
+}
+
+static void
+esw_cleanup_dests(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+
+ if (attr->dest_ft) {
+ esw_cleanup_decap_indir(esw, attr);
+ } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
+ if (attr->dest_chain)
+ esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
+ else if (esw_is_indir_table(esw, attr))
+ esw_cleanup_indir_table(esw, attr);
+ else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
+ esw_cleanup_chain_src_port_rewrite(esw, attr);
+ }
+}
+
+static void
+esw_setup_meter(struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = attr->meter_attr.meter;
+ flow_act->exe_aso.type = attr->exe_aso_type;
+ flow_act->exe_aso.object_id = meter->obj_id;
+ flow_act->exe_aso.flow_meter.meter_idx = meter->idx;
+ flow_act->exe_aso.flow_meter.init_color = MLX5_FLOW_METER_COLOR_GREEN;
+ /* use metadata reg 5 for packet color */
+ flow_act->exe_aso.return_reg_id = 5;
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ bool split = !!(esw_attr->split_count);
+ struct mlx5_vport_tbl_attr fwd_attr;
+ struct mlx5_flow_destination *dest;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_table *fdb;
+ int i = 0;
+
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
+ if (!dest)
+ return ERR_PTR(-ENOMEM);
+
+ flow_act.action = attr->action;
+ /* if per flow vlan pop/push is emulated, don't set that into the firmware */
+ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
+ flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+ else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+ flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]);
+ flow_act.vlan[0].vid = esw_attr->vlan_vid[0];
+ flow_act.vlan[0].prio = esw_attr->vlan_prio[0];
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+ flow_act.vlan[1].ethtype = ntohs(esw_attr->vlan_proto[1]);
+ flow_act.vlan[1].vid = esw_attr->vlan_vid[1];
+ flow_act.vlan[1].prio = esw_attr->vlan_prio[1];
+ }
+ }
+
+ mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ int err;
+
+ err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_create_goto_table;
+ }
+ }
+
+ if (esw_attr->decap_pkt_reformat)
+ flow_act.pkt_reformat = esw_attr->decap_pkt_reformat;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[i].counter_id = mlx5_fc_id(attr->counter);
+ i++;
+ }
+
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+ if (attr->inner_match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ flow_act.modify_hdr = attr->modify_hdr;
+
+ if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
+ attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)
+ esw_setup_meter(attr, &flow_act);
+
+ if (split) {
+ fwd_attr.chain = attr->chain;
+ fwd_attr.prio = attr->prio;
+ fwd_attr.vport = esw_attr->in_rep->vport;
+ fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+
+ fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
+ } else {
+ if (attr->chain || attr->prio)
+ fdb = mlx5_chains_get_table(chains, attr->chain,
+ attr->prio, 0);
+ else
+ fdb = attr->ft;
+
+ if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT))
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr,
+ esw_attr->in_mdev->priv.eswitch,
+ esw_attr->in_rep->vport);
+ }
+ if (IS_ERR(fdb)) {
+ rule = ERR_CAST(fdb);
+ goto err_esw_get;
+ }
+
+ if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
+ rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr,
+ &flow_act, dest, i);
+ else
+ rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
+ if (IS_ERR(rule))
+ goto err_add_rule;
+ else
+ atomic64_inc(&esw->offloads.num_flows);
+
+ kfree(dest);
+ return rule;
+
+err_add_rule:
+ if (split)
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+ else if (attr->chain || attr->prio)
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
+err_esw_get:
+ esw_cleanup_dests(esw, attr);
+err_create_goto_table:
+ kfree(dest);
+ return rule;
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ struct mlx5_vport_tbl_attr fwd_attr;
+ struct mlx5_flow_destination *dest;
+ struct mlx5_flow_table *fast_fdb;
+ struct mlx5_flow_table *fwd_fdb;
+ struct mlx5_flow_handle *rule;
+ int i, err = 0;
+
+ dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
+ if (!dest)
+ return ERR_PTR(-ENOMEM);
+
+ fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
+ if (IS_ERR(fast_fdb)) {
+ rule = ERR_CAST(fast_fdb);
+ goto err_get_fast;
+ }
+
+ fwd_attr.chain = attr->chain;
+ fwd_attr.prio = attr->prio;
+ fwd_attr.vport = esw_attr->in_rep->vport;
+ fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+ fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
+ if (IS_ERR(fwd_fdb)) {
+ rule = ERR_CAST(fwd_fdb);
+ goto err_get_fwd;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ for (i = 0; i < esw_attr->split_count; i++) {
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ /* Source port rewrite (forward to ovs internal port or statck device) isn't
+ * supported in the rule of split action.
+ */
+ err = -EOPNOTSUPP;
+ else
+ esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false);
+
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_chain_src_rewrite;
+ }
+ }
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = fwd_fdb;
+ i++;
+
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr,
+ esw_attr->in_mdev->priv.eswitch,
+ esw_attr->in_rep->vport);
+
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
+
+ if (IS_ERR(rule)) {
+ i = esw_attr->split_count;
+ goto err_chain_src_rewrite;
+ }
+
+ atomic64_inc(&esw->offloads.num_flows);
+
+ kfree(dest);
+ return rule;
+err_chain_src_rewrite:
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+err_get_fwd:
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
+err_get_fast:
+ kfree(dest);
+ return rule;
+}
+
+static void
+__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr,
+ bool fwd_rule)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ bool split = (esw_attr->split_count > 0);
+ struct mlx5_vport_tbl_attr fwd_attr;
+ int i;
+
+ mlx5_del_flow_rules(rule);
+
+ if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
+ /* unref the term table */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (esw_attr->dests[i].termtbl)
+ mlx5_eswitch_termtbl_put(esw, esw_attr->dests[i].termtbl);
+ }
+ }
+
+ atomic64_dec(&esw->offloads.num_flows);
+
+ if (fwd_rule || split) {
+ fwd_attr.chain = attr->chain;
+ fwd_attr.prio = attr->prio;
+ fwd_attr.vport = esw_attr->in_rep->vport;
+ fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+ }
+
+ if (fwd_rule) {
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
+ } else {
+ if (split)
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+ else if (attr->chain || attr->prio)
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
+ esw_cleanup_dests(esw, attr);
+ }
+}
+
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ __mlx5_eswitch_del_rule(esw, rule, attr, false);
+}
+
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ __mlx5_eswitch_del_rule(esw, rule, attr, true);
+}
+
+static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ int err = 0;
+
+ esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
+ mlx5_esw_for_each_host_func_vport(esw, i, rep, esw->esw_funcs.num_vfs) {
+ if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
+ continue;
+
+ err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+static struct mlx5_eswitch_rep *
+esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
+{
+ struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
+
+ in_rep = attr->in_rep;
+ out_rep = attr->dests[0].rep;
+
+ if (push)
+ vport = in_rep;
+ else if (pop)
+ vport = out_rep;
+ else
+ vport = in_rep;
+
+ return vport;
+}
+
+static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
+ bool push, bool pop, bool fwd)
+{
+ struct mlx5_eswitch_rep *in_rep, *out_rep;
+
+ if ((push || pop) && !fwd)
+ goto out_notsupp;
+
+ in_rep = attr->in_rep;
+ out_rep = attr->dests[0].rep;
+
+ if (push && in_rep->vport == MLX5_VPORT_UPLINK)
+ goto out_notsupp;
+
+ if (pop && out_rep->vport == MLX5_VPORT_UPLINK)
+ goto out_notsupp;
+
+ /* vport has vlan push configured, can't offload VF --> wire rules w.o it */
+ if (!push && !pop && fwd)
+ if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK)
+ goto out_notsupp;
+
+ /* protects against (1) setting rules with different vlans to push and
+ * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
+ */
+ if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0]))
+ goto out_notsupp;
+
+ return 0;
+
+out_notsupp:
+ return -EOPNOTSUPP;
+}
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_eswitch_rep *vport = NULL;
+ bool push, pop, fwd;
+ int err = 0;
+
+ /* nop if we're on the vlan push/pop non emulation mode */
+ if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
+ return 0;
+
+ push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+ pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+ fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+ !attr->dest_chain);
+
+ mutex_lock(&esw->state_lock);
+
+ err = esw_add_vlan_action_check(esw_attr, push, pop, fwd);
+ if (err)
+ goto unlock;
+
+ attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED;
+
+ vport = esw_vlan_action_get_vport(esw_attr, push, pop);
+
+ if (!push && !pop && fwd) {
+ /* tracks VF --> wire rules without vlan push action */
+ if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
+ vport->vlan_refcount++;
+ attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
+ }
+
+ goto unlock;
+ }
+
+ if (!push && !pop)
+ goto unlock;
+
+ if (!(offloads->vlan_push_pop_refcount)) {
+ /* it's the 1st vlan rule, apply global vlan pop policy */
+ err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP);
+ if (err)
+ goto out;
+ }
+ offloads->vlan_push_pop_refcount++;
+
+ if (push) {
+ if (vport->vlan_refcount)
+ goto skip_set_push;
+
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, esw_attr->vlan_vid[0],
+ 0, SET_VLAN_INSERT | SET_VLAN_STRIP);
+ if (err)
+ goto out;
+ vport->vlan = esw_attr->vlan_vid[0];
+skip_set_push:
+ vport->vlan_refcount++;
+ }
+out:
+ if (!err)
+ attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_eswitch_rep *vport = NULL;
+ bool push, pop, fwd;
+ int err = 0;
+
+ /* nop if we're on the vlan push/pop non emulation mode */
+ if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
+ return 0;
+
+ if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED))
+ return 0;
+
+ push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+ pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+ fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+
+ mutex_lock(&esw->state_lock);
+
+ vport = esw_vlan_action_get_vport(esw_attr, push, pop);
+
+ if (!push && !pop && fwd) {
+ /* tracks VF --> wire rules without vlan push action */
+ if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
+ vport->vlan_refcount--;
+
+ goto out;
+ }
+
+ if (push) {
+ vport->vlan_refcount--;
+ if (vport->vlan_refcount)
+ goto skip_unset_push;
+
+ vport->vlan = 0;
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport,
+ 0, 0, SET_VLAN_STRIP);
+ if (err)
+ goto out;
+ }
+
+skip_unset_push:
+ offloads->vlan_push_pop_refcount--;
+ if (offloads->vlan_push_pop_refcount)
+ goto out;
+
+ /* no more vlan rules, stop global vlan pop policy */
+ err = esw_set_global_vlan_pop(esw, 0);
+
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+ struct mlx5_eswitch *from_esw,
+ struct mlx5_eswitch_rep *rep,
+ u32 sqn)
+{
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ flow_rule = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
+ /* source vport is the esw manager */
+ MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
+ if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(from_esw->dev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = rep->vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ if (MLX5_CAP_ESW_FLOWTABLE(on_esw->dev, flow_source) &&
+ rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
+ flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule))
+ esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n",
+ PTR_ERR(flow_rule));
+out:
+ kvfree(spec);
+ return flow_rule;
+}
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
+
+void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
+{
+ mlx5_del_flow_rules(rule);
+}
+
+void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule)
+{
+ if (rule)
+ mlx5_del_flow_rules(rule);
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(fte_match_param, spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_1,
+ ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
+ dest.vport.num = vport_num;
+
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule))
+ esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %ld\n",
+ vport_num, PTR_ERR(flow_rule));
+
+ kvfree(spec);
+ return flow_rule;
+}
+
+static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw)
+{
+ return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+ MLX5_FDB_TO_VPORT_REG_C_1;
+}
+
+static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 min[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+ u8 curr, wanted;
+ int err;
+
+ if (!mlx5_eswitch_reg_c1_loopback_supported(esw) &&
+ !mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return 0;
+
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ err = mlx5_cmd_exec_inout(esw->dev, query_esw_vport_context, in, out);
+ if (err)
+ return err;
+
+ curr = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+ wanted = MLX5_FDB_TO_VPORT_REG_C_0;
+ if (mlx5_eswitch_reg_c1_loopback_supported(esw))
+ wanted |= MLX5_FDB_TO_VPORT_REG_C_1;
+
+ if (enable)
+ curr |= wanted;
+ else
+ curr &= ~wanted;
+
+ MLX5_SET(modify_esw_vport_context_in, min,
+ esw_vport_context.fdb_to_vport_reg_c_id, curr);
+ MLX5_SET(modify_esw_vport_context_in, min,
+ field_select.fdb_to_vport_reg_c_id, 1);
+
+ err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, min);
+ if (!err) {
+ if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1))
+ esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED;
+ else
+ esw->flags &= ~MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED;
+ }
+
+ return err;
+}
+
+static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
+ struct mlx5_core_dev *peer_dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_destination *dest)
+{
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(peer_dev, vhca_id));
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+ }
+
+ dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest->vport.num = peer_dev->priv.eswitch->manager_vport;
+ dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
+ dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+}
+
+static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw,
+ struct mlx5_flow_spec *spec,
+ u16 vport)
+{
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
+ vport));
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+ }
+}
+
+static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
+ struct mlx5_core_dev *peer_dev)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle **flows;
+ /* total vports is the same for both e-switches */
+ int nvports = esw->total_vports;
+ struct mlx5_flow_handle *flow;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_vport *vport;
+ unsigned long i;
+ void *misc;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ peer_miss_rules_setup(esw, peer_dev, spec, &dest);
+
+ flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL);
+ if (!flows) {
+ err = -ENOMEM;
+ goto alloc_flows_err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
+ spec, MLX5_VPORT_PF);
+
+ flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ goto add_pf_flow_err;
+ }
+ flows[vport->index] = flow;
+ }
+
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
+ flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ goto add_ecpf_flow_err;
+ }
+ flows[vport->index] = flow;
+ }
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
+ esw_set_peer_miss_rule_source_port(esw,
+ peer_dev->priv.eswitch,
+ spec, vport->vport);
+
+ flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ goto add_vf_flow_err;
+ }
+ flows[vport->index] = flow;
+ }
+
+ esw->fdb_table.offloads.peer_miss_rules = flows;
+
+ kvfree(spec);
+ return 0;
+
+add_vf_flow_err:
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
+ if (!flows[vport->index])
+ continue;
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+add_ecpf_flow_err:
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+add_pf_flow_err:
+ esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
+ kvfree(flows);
+alloc_flows_err:
+ kvfree(spec);
+ return err;
+}
+
+static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_handle **flows;
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ flows = esw->fdb_table.offloads.peer_miss_rules;
+
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
+ mlx5_del_flow_rules(flows[vport->index]);
+
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+ kvfree(flows);
+}
+
+static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_spec *spec;
+ void *headers_c;
+ void *headers_v;
+ int err = 0;
+ u8 *dmac_c;
+ u8 *dmac_v;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+ outer_headers.dmac_47_16);
+ dmac_c[0] = 0x01;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = esw->manager_vport;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err);
+ goto out;
+ }
+
+ esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+ outer_headers.dmac_47_16);
+ dmac_v[0] = 0x01;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ goto out;
+ }
+
+ esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
+out:
+ kvfree(spec);
+ return err;
+}
+
+struct mlx5_flow_handle *
+esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
+{
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore;
+ struct mlx5_flow_context *flow_context;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_destination dest;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ ESW_REG_C0_USER_DATA_METADATA_MASK);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id;
+
+ flow_context = &spec->flow_context;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = tag;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = esw->offloads.ft_offloads;
+
+ flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+
+ if (IS_ERR(flow_rule))
+ esw_warn(esw->dev,
+ "Failed to create restore rule for tag: %d, err(%d)\n",
+ tag, (int)PTR_ERR(flow_rule));
+
+ return flow_rule;
+}
+
+#define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
+
+static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+ u32 *flow_group_in)
+{
+ void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ } else {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ }
+}
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport_tbl_attr attr;
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ attr.chain = 0;
+ attr.prio = 1;
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ attr.vport = vport->vport;
+ attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+ mlx5_esw_vporttbl_put(esw, &attr);
+ }
+}
+
+static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport_tbl_attr attr;
+ struct mlx5_flow_table *fdb;
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ attr.chain = 0;
+ attr.prio = 1;
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ attr.vport = vport->vport;
+ attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
+ fdb = mlx5_esw_vporttbl_get(esw, &attr);
+ if (IS_ERR(fdb))
+ goto out;
+ }
+ return 0;
+
+out:
+ esw_vport_tbl_put(esw);
+ return PTR_ERR(fdb);
+}
+
+#define fdb_modify_header_fwd_to_table_supported(esw) \
+ (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
+static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ignore_flow_level))
+ *flags |= MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(dev, multi_fdb_encap) &&
+ esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+ *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
+ } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_warn(dev, "Tc chains and priorities offload aren't supported\n");
+ } else if (!fdb_modify_header_fwd_to_table_supported(esw)) {
+ /* Disabled when ttl workaround is needed, e.g
+ * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig
+ */
+ esw_warn(dev,
+ "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n");
+ *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ } else {
+ *flags |= MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_info(dev, "Supported tc chains and prios offload\n");
+ }
+
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ *flags |= MLX5_CHAINS_FT_TUNNEL_SUPPORTED;
+}
+
+static int
+esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_table *nf_ft, *ft;
+ struct mlx5_chains_attr attr = {};
+ struct mlx5_fs_chains *chains;
+ u32 fdb_max;
+ int err;
+
+ fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
+
+ esw_init_chains_offload_flags(esw, &attr.flags);
+ attr.ns = MLX5_FLOW_NAMESPACE_FDB;
+ attr.max_ft_sz = fdb_max;
+ attr.max_grp_num = esw->params.large_group_num;
+ attr.default_ft = miss_fdb;
+ attr.mapping = esw->offloads.reg_c0_obj_pool;
+
+ chains = mlx5_chains_create(dev, &attr);
+ if (IS_ERR(chains)) {
+ err = PTR_ERR(chains);
+ esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
+ return err;
+ }
+
+ esw->fdb_table.offloads.esw_chains_priv = chains;
+
+ /* Create tc_end_ft which is the always created ft chain */
+ nf_ft = mlx5_chains_get_table(chains, mlx5_chains_get_nf_ft_chain(chains),
+ 1, 0);
+ if (IS_ERR(nf_ft)) {
+ err = PTR_ERR(nf_ft);
+ goto nf_ft_err;
+ }
+
+ /* Always open the root for fast path */
+ ft = mlx5_chains_get_table(chains, 0, 1, 0);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto level_0_err;
+ }
+
+ /* Open level 1 for split fdb rules now if prios isn't supported */
+ if (!mlx5_chains_prios_supported(chains)) {
+ err = esw_vport_tbl_get(esw);
+ if (err)
+ goto level_1_err;
+ }
+
+ mlx5_chains_set_end_ft(chains, nf_ft);
+
+ return 0;
+
+level_1_err:
+ mlx5_chains_put_table(chains, 0, 1, 0);
+level_0_err:
+ mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
+nf_ft_err:
+ mlx5_chains_destroy(chains);
+ esw->fdb_table.offloads.esw_chains_priv = NULL;
+
+ return err;
+}
+
+static void
+esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
+{
+ if (!mlx5_chains_prios_supported(chains))
+ esw_vport_tbl_put(esw);
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
+ mlx5_chains_destroy(chains);
+}
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static int
+esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
+{ return 0; }
+
+static void
+esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
+{}
+
+#endif
+
+static int
+esw_create_send_to_vport_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int count, err = 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
+
+ /* See comment at table_size calculation */
+ count = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, *ix + count - 1);
+ *ix += count;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.offloads.send_to_vport_grp = g;
+
+out:
+ return err;
+}
+
+static int
+esw_create_meta_send_to_vport_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+
+ if (!esw_src_port_rewrite_supported(esw))
+ return 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ end_flow_index, *ix + esw->total_vports - 1);
+ *ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev,
+ "Failed to create send-to-vport meta flow group err(%d)\n", err);
+ goto send_vport_meta_err;
+ }
+ esw->fdb_table.offloads.send_to_vport_meta_grp = g;
+
+ return 0;
+
+send_vport_meta_err:
+ return err;
+}
+
+static int
+esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ esw_set_flow_group_source_port(esw, flow_group_in);
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ *ix + esw->total_vports - 1);
+ *ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev, "Failed to create peer miss flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.offloads.peer_miss_grp = g;
+
+out:
+ return err;
+}
+
+static int
+esw_create_miss_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+ u8 *dmac;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ *ix + MLX5_ESW_MISS_FLOWS);
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev, "Failed to create miss flow group err(%d)\n", err);
+ goto miss_err;
+ }
+ esw->fdb_table.offloads.miss_grp = g;
+
+ err = esw_add_fdb_miss_rule(esw);
+ if (err)
+ goto miss_rule_err;
+
+ return 0;
+
+miss_rule_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+miss_err:
+ return err;
+}
+
+static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb = NULL;
+ int table_size, ix = 0, err = 0;
+ u32 flags = 0, *flow_group_in;
+
+ esw_debug(esw->dev, "Create offloads FDB Tables\n");
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ err = -EOPNOTSUPP;
+ goto ns_err;
+ }
+ esw->fdb_table.offloads.ns = root_ns;
+ err = mlx5_flow_namespace_set_mode(root_ns,
+ esw->dev->priv.steering->mode);
+ if (err) {
+ esw_warn(dev, "Failed to set FDB namespace steering mode\n");
+ goto ns_err;
+ }
+
+ /* To be strictly correct:
+ * MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
+ * should be:
+ * esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+ * peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
+ * but as the peer device might not be in switchdev mode it's not
+ * possible. We use the fact that by default FW sets max vfs and max sfs
+ * to the same value on both devices. If it needs to be changed in the future note
+ * the peer miss group should also be created based on the number of
+ * total vports of the peer (currently is also uses esw->total_vports).
+ */
+ table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
+ esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS;
+
+ /* create the slow path fdb with encap set, so further table instances
+ * can be created at run time while VFs are probed if the FW allows that.
+ */
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+ ft_attr.flags = flags;
+ ft_attr.max_fte = table_size;
+ ft_attr.prio = FDB_SLOW_PATH;
+
+ fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
+ goto slow_fdb_err;
+ }
+ esw->fdb_table.offloads.slow_fdb = fdb;
+
+ /* Create empty TC-miss managed table. This allows plugging in following
+ * priorities without directly exposing their level 0 table to
+ * eswitch_offloads and passing it as miss_fdb to following call to
+ * esw_chains_create().
+ */
+ memset(&ft_attr, 0, sizeof(ft_attr));
+ ft_attr.prio = FDB_TC_MISS;
+ esw->fdb_table.offloads.tc_miss_table = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(esw->fdb_table.offloads.tc_miss_table)) {
+ err = PTR_ERR(esw->fdb_table.offloads.tc_miss_table);
+ esw_warn(dev, "Failed to create TC miss FDB Table err %d\n", err);
+ goto tc_miss_table_err;
+ }
+
+ err = esw_chains_create(esw, esw->fdb_table.offloads.tc_miss_table);
+ if (err) {
+ esw_warn(dev, "Failed to open fdb chains err(%d)\n", err);
+ goto fdb_chains_err;
+ }
+
+ err = esw_create_send_to_vport_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto send_vport_err;
+
+ err = esw_create_meta_send_to_vport_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto send_vport_meta_err;
+
+ err = esw_create_peer_esw_miss_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto peer_miss_err;
+
+ err = esw_create_miss_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto miss_err;
+
+ kvfree(flow_group_in);
+ return 0;
+
+miss_err:
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+peer_miss_err:
+ if (esw->fdb_table.offloads.send_to_vport_meta_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
+send_vport_meta_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+send_vport_err:
+ esw_chains_destroy(esw, esw_chains(esw));
+fdb_chains_err:
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
+tc_miss_table_err:
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
+slow_fdb_err:
+ /* Holds true only as long as DMFS is the default */
+ mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS);
+ns_err:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
+{
+ if (!esw->fdb_table.offloads.slow_fdb)
+ return;
+
+ esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+ if (esw->fdb_table.offloads.send_to_vport_meta_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+
+ esw_chains_destroy(esw, esw_chains(esw));
+
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
+ /* Holds true only as long as DMFS is the default */
+ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
+ MLX5_FLOW_STEERING_MODE_DMFS);
+ atomic64_set(&esw->user_count, 0);
+}
+
+static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw)
+{
+ int nvports;
+
+ nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS;
+ if (mlx5e_tc_int_port_supported(esw))
+ nvports += MLX5E_TC_MAX_INT_PORT_NUM;
+
+ return nvports;
+}
+
+static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_table *ft_offloads;
+ struct mlx5_flow_namespace *ns;
+ int err = 0;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+ if (!ns) {
+ esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.max_fte = esw_get_nr_ft_offloads_steering_src_ports(esw) +
+ MLX5_ESW_FT_OFFLOADS_DROP_RULE;
+ ft_attr.prio = 1;
+
+ ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft_offloads)) {
+ err = PTR_ERR(ft_offloads);
+ esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
+ return err;
+ }
+
+ esw->offloads.ft_offloads = ft_offloads;
+ return 0;
+}
+
+static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+
+ mlx5_destroy_flow_table(offloads->ft_offloads);
+}
+
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ u32 *flow_group_in;
+ int nvports;
+ int err = 0;
+
+ nvports = esw_get_nr_ft_offloads_steering_src_ports(esw);
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ /* create vport rx group */
+ esw_set_flow_group_source_port(esw, flow_group_in);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
+
+ g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
+ goto out;
+ }
+
+ esw->offloads.vport_rx_group = g;
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
+{
+ mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
+}
+
+static int esw_create_vport_rx_drop_rule_index(struct mlx5_eswitch *esw)
+{
+ /* ft_offloads table is enlarged by MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
+ * for the drop rule, which is placed at the end of the table.
+ * So return the total of vport and int_port as rule index.
+ */
+ return esw_get_nr_ft_offloads_steering_src_ports(esw);
+}
+
+static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ u32 *flow_group_in;
+ int flow_index;
+ int err = 0;
+
+ flow_index = esw_create_vport_rx_drop_rule_index(esw);
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err);
+ goto out;
+ }
+
+ esw->offloads.vport_rx_drop_group = g;
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw)
+{
+ if (esw->offloads.vport_rx_drop_group)
+ mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group);
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ flow_rule = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+ &flow_act, dest, 1);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
+ goto out;
+ }
+
+out:
+ kvfree(spec);
+ return flow_rule;
+}
+
+static int esw_create_vport_rx_drop_rule(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, NULL,
+ &flow_act, NULL, 0);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev,
+ "fs offloads: Failed to add vport rx drop rule err %ld\n",
+ PTR_ERR(flow_rule));
+ return PTR_ERR(flow_rule);
+ }
+
+ esw->offloads.vport_rx_drop_rule = flow_rule;
+
+ return 0;
+}
+
+static void esw_destroy_vport_rx_drop_rule(struct mlx5_eswitch *esw)
+{
+ if (esw->offloads.vport_rx_drop_rule)
+ mlx5_del_flow_rules(esw->offloads.vport_rx_drop_rule);
+}
+
+static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
+{
+ u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager))
+ return -EOPNOTSUPP;
+
+ if (!mlx5_esw_is_fdb_created(esw))
+ return -EOPNOTSUPP;
+
+ switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ mlx5_mode = MLX5_INLINE_MODE_NONE;
+ goto out;
+ case MLX5_CAP_INLINE_MODE_L2:
+ mlx5_mode = MLX5_INLINE_MODE_L2;
+ goto out;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ goto query_vports;
+ }
+
+query_vports:
+ mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode);
+ if (prev_mlx5_mode != mlx5_mode)
+ return -EINVAL;
+ prev_mlx5_mode = mlx5_mode;
+ }
+
+out:
+ *mode = mlx5_mode;
+ return 0;
+}
+
+static void esw_destroy_restore_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+
+ if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
+ return;
+
+ mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id);
+ mlx5_destroy_flow_group(offloads->restore_group);
+ mlx5_destroy_flow_table(offloads->ft_offloads_restore);
+}
+
+static int esw_create_restore_table(struct mlx5_eswitch *esw)
+{
+ u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_modify_hdr *mod_hdr;
+ void *match_criteria, *misc;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *g;
+ u32 *flow_group_in;
+ int err = 0;
+
+ if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
+ return 0;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+ if (!ns) {
+ esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS;
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ esw_warn(esw->dev, "Failed to create restore table, err %d\n",
+ err);
+ goto out_free;
+ }
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ misc = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ ESW_REG_C0_USER_DATA_METADATA_MASK);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ft_attr.max_fte - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create restore flow group, err: %d\n",
+ err);
+ goto err_group;
+ }
+
+ MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY);
+ MLX5_SET(copy_action_in, modact, src_field,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_1);
+ MLX5_SET(copy_action_in, modact, dst_field,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+ mod_hdr = mlx5_modify_header_alloc(esw->dev,
+ MLX5_FLOW_NAMESPACE_KERNEL, 1,
+ modact);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ esw_warn(dev, "Failed to create restore mod header, err: %d\n",
+ err);
+ goto err_mod_hdr;
+ }
+
+ esw->offloads.ft_offloads_restore = ft;
+ esw->offloads.restore_group = g;
+ esw->offloads.restore_copy_hdr_id = mod_hdr;
+
+ kvfree(flow_group_in);
+
+ return 0;
+
+err_mod_hdr:
+ mlx5_destroy_flow_group(g);
+err_group:
+ mlx5_destroy_flow_table(ft);
+out_free:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static int esw_offloads_start(struct mlx5_eswitch *esw,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ esw->mode = MLX5_ESWITCH_OFFLOADS;
+ err = mlx5_eswitch_enable_locked(esw, esw->dev->priv.sriov.num_vfs);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed setting eswitch to offloads");
+ esw->mode = MLX5_ESWITCH_LEGACY;
+ mlx5_rescan_drivers(esw->dev);
+ }
+ if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
+ if (mlx5_eswitch_inline_mode_get(esw,
+ &esw->offloads.inline_mode)) {
+ esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
+ NL_SET_ERR_MSG_MOD(extack,
+ "Inline mode is different between vports");
+ }
+ }
+ return err;
+}
+
+static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ xa_mark_t mark)
+{
+ bool mark_set;
+
+ /* Copy the mark from vport to its rep */
+ mark_set = xa_get_mark(&esw->vports, rep->vport, mark);
+ if (mark_set)
+ xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark);
+}
+
+static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport)
+{
+ struct mlx5_eswitch_rep *rep;
+ int rep_type;
+ int err;
+
+ rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ rep->vport = vport->vport;
+ rep->vport_index = vport->index;
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+ atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
+
+ err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL);
+ if (err)
+ goto insert_err;
+
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN);
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF);
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF);
+ return 0;
+
+insert_err:
+ kfree(rep);
+ return err;
+}
+
+static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep)
+{
+ xa_erase(&esw->offloads.vport_reps, rep->vport);
+ kfree(rep);
+}
+
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ mlx5_esw_for_each_rep(esw, i, rep)
+ mlx5_esw_offloads_rep_cleanup(esw, rep);
+ xa_destroy(&esw->offloads.vport_reps);
+}
+
+int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ xa_init(&esw->offloads.vport_reps);
+
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ err = mlx5_esw_offloads_rep_init(esw, vport);
+ if (err)
+ goto err;
+ }
+ return 0;
+
+err:
+ esw_offloads_cleanup_reps(esw);
+ return err;
+}
+
+static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep, u8 rep_type)
+{
+ if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
+ REP_LOADED, REP_REGISTERED) == REP_LOADED)
+ esw->offloads.rep_ops[rep_type]->unload(rep);
+}
+
+static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ mlx5_esw_for_each_sf_rep(esw, i, rep)
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+}
+
+static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ __unload_reps_sf_vport(esw, rep_type);
+
+ mlx5_esw_for_each_vf_rep(esw, i, rep)
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+ }
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+ }
+
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+}
+
+int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_eswitch_rep *rep;
+ int rep_type;
+ int err;
+
+ rep = mlx5_eswitch_get_rep(esw, vport_num);
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+ if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
+ REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
+ err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
+ if (err)
+ goto err_reps;
+ }
+
+ return 0;
+
+err_reps:
+ atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED);
+ for (--rep_type; rep_type >= 0; rep_type--)
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+ return err;
+}
+
+void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_eswitch_rep *rep;
+ int rep_type;
+
+ rep = mlx5_eswitch_get_rep(esw, vport_num);
+ for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--)
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+}
+
+int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ int err;
+
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return 0;
+
+ if (vport_num != MLX5_VPORT_UPLINK) {
+ err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
+ if (err)
+ return err;
+ }
+
+ err = mlx5_esw_offloads_rep_load(esw, vport_num);
+ if (err)
+ goto load_err;
+ return err;
+
+load_err:
+ if (vport_num != MLX5_VPORT_UPLINK)
+ mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+ return err;
+}
+
+void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return;
+
+ mlx5_esw_offloads_rep_unload(esw, vport_num);
+
+ if (vport_num != MLX5_VPORT_UPLINK)
+ mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+}
+
+static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+ int err;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+
+ if (master) {
+ ns = mlx5_get_flow_namespace(master,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ }
+
+ err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+
+static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_table *acl)
+{
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(slave, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = slave->priv.eswitch->manager_vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+ flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
+ &dest, 1);
+ if (IS_ERR(flow_rule))
+ err = PTR_ERR(flow_rule);
+ else
+ vport->egress.offloads.bounce_rule = flow_rule;
+
+ kvfree(spec);
+ return err;
+}
+
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_eswitch *esw = master->priv.eswitch;
+ struct mlx5_flow_table_attr ft_attr = {
+ .max_fte = 1, .prio = 0, .level = 0,
+ .flags = MLX5_FLOW_TABLE_OTHER_VPORT,
+ };
+ struct mlx5_flow_namespace *egress_ns;
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *g;
+ struct mlx5_vport *vport;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ vport->index);
+ if (!egress_ns)
+ return -EINVAL;
+
+ if (vport->egress.acl)
+ return -EINVAL;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ goto out;
+ }
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ goto err_group;
+ }
+
+ err = __esw_set_master_egress_rule(master, slave, vport, acl);
+ if (err)
+ goto err_rule;
+
+ vport->egress.acl = acl;
+ vport->egress.offloads.bounce_grp = g;
+
+ kvfree(flow_group_in);
+
+ return 0;
+
+err_rule:
+ mlx5_destroy_flow_group(g);
+err_group:
+ mlx5_destroy_flow_table(acl);
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
+ dev->priv.eswitch->manager_vport);
+
+ esw_acl_egress_ofld_cleanup(vport);
+}
+
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ int err;
+
+ err = esw_set_slave_root_fdb(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ return err;
+
+ err = esw_set_master_egress_rule(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ goto err_acl;
+
+ return err;
+
+err_acl:
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+
+ return err;
+}
+
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ esw_unset_master_egress_rule(master_esw->dev);
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+}
+
+#define ESW_OFFLOADS_DEVCOM_PAIR (0)
+#define ESW_OFFLOADS_DEVCOM_UNPAIR (1)
+
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
+{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ rep_type = NUM_REP_TYPES;
+ while (rep_type--) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event)
+ ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+ }
+ }
+}
+
+static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
+{
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+ mlx5e_tc_clean_fdb_peer_flows(esw);
+#endif
+ mlx5_esw_offloads_rep_event_unpair(esw);
+ esw_del_fdb_peer_miss_rules(esw);
+}
+
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw)
+{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
+ int err;
+
+ err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ if (err)
+ return err;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event) {
+ err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
+ if (err)
+ goto err_out;
+ }
+ }
+ }
+
+ return 0;
+
+err_out:
+ mlx5_esw_offloads_unpair(esw);
+ return err;
+}
+
+static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw,
+ bool pair)
+{
+ struct mlx5_flow_root_namespace *peer_ns;
+ struct mlx5_flow_root_namespace *ns;
+ int err;
+
+ peer_ns = peer_esw->dev->priv.steering->fdb_root_ns;
+ ns = esw->dev->priv.steering->fdb_root_ns;
+
+ if (pair) {
+ err = mlx5_flow_namespace_set_peer(ns, peer_ns);
+ if (err)
+ return err;
+
+ err = mlx5_flow_namespace_set_peer(peer_ns, ns);
+ if (err) {
+ mlx5_flow_namespace_set_peer(ns, NULL);
+ return err;
+ }
+ } else {
+ mlx5_flow_namespace_set_peer(ns, NULL);
+ mlx5_flow_namespace_set_peer(peer_ns, NULL);
+ }
+
+ return 0;
+}
+
+static int mlx5_esw_offloads_devcom_event(int event,
+ void *my_data,
+ void *event_data)
+{
+ struct mlx5_eswitch *esw = my_data;
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+ struct mlx5_eswitch *peer_esw = event_data;
+ int err;
+
+ switch (event) {
+ case ESW_OFFLOADS_DEVCOM_PAIR:
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
+ mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
+ break;
+
+ if (esw->paired[mlx5_get_dev_index(peer_esw->dev)])
+ break;
+
+ err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
+ if (err)
+ goto err_out;
+ err = mlx5_esw_offloads_pair(esw, peer_esw);
+ if (err)
+ goto err_peer;
+
+ err = mlx5_esw_offloads_pair(peer_esw, esw);
+ if (err)
+ goto err_pair;
+
+ esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true;
+ peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true;
+ mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
+ break;
+
+ case ESW_OFFLOADS_DEVCOM_UNPAIR:
+ if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)])
+ break;
+
+ mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
+ esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false;
+ peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false;
+ mlx5_esw_offloads_unpair(peer_esw);
+ mlx5_esw_offloads_unpair(esw);
+ mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
+ break;
+ }
+
+ return 0;
+
+err_pair:
+ mlx5_esw_offloads_unpair(esw);
+err_peer:
+ mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
+err_out:
+ mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
+ event, err);
+ return err;
+}
+
+void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+ INIT_LIST_HEAD(&esw->offloads.peer_flows);
+ mutex_init(&esw->offloads.peer_mutex);
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return;
+
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
+ mlx5_devcom_register_component(devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS,
+ mlx5_esw_offloads_devcom_event,
+ esw);
+
+ mlx5_devcom_send_event(devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS,
+ ESW_OFFLOADS_DEVCOM_PAIR, esw);
+}
+
+void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return;
+
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
+ mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+ ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
+
+ mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
+bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+{
+ if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
+ return false;
+
+ if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+ MLX5_FDB_TO_VPORT_REG_C_0))
+ return false;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+ return false;
+
+ return true;
+}
+
+#define MLX5_ESW_METADATA_RSVD_UPLINK 1
+
+/* Share the same metadata for uplink's. This is fine because:
+ * (a) In shared FDB mode (LAG) both uplink's are treated the
+ * same and tagged with the same metadata.
+ * (b) In non shared FDB mode, packets from physical port0
+ * cannot hit eswitch of PF1 and vice versa.
+ */
+static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw)
+{
+ return MLX5_ESW_METADATA_RSVD_UPLINK;
+}
+
+u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
+{
+ u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1;
+ /* Reserve 0xf for internal port offload */
+ u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 2;
+ u32 pf_num;
+ int id;
+
+ /* Only 4 bits of pf_num */
+ pf_num = mlx5_get_dev_index(esw->dev);
+ if (pf_num > max_pf_num)
+ return 0;
+
+ /* Metadata is 4 bits of PFNUM and 12 bits of unique id */
+ /* Use only non-zero vport_id (2-4095) for all PF's */
+ id = ida_alloc_range(&esw->offloads.vport_metadata_ida,
+ MLX5_ESW_METADATA_RSVD_UPLINK + 1,
+ vport_end_ida, GFP_KERNEL);
+ if (id < 0)
+ return 0;
+ id = (pf_num << ESW_VPORT_BITS) | id;
+ return id;
+}
+
+void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata)
+{
+ u32 vport_bit_mask = (1 << ESW_VPORT_BITS) - 1;
+
+ /* Metadata contains only 12 bits of actual ida id */
+ ida_free(&esw->offloads.vport_metadata_ida, metadata & vport_bit_mask);
+}
+
+static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ vport->default_metadata = mlx5_esw_match_metadata_reserved(esw);
+ else
+ vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
+
+ vport->metadata = vport->default_metadata;
+ return vport->metadata ? 0 : -ENOSPC;
+}
+
+static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!vport->default_metadata)
+ return;
+
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ return;
+
+ WARN_ON(vport->metadata != vport->default_metadata);
+ mlx5_esw_match_metadata_free(esw, vport->default_metadata);
+}
+
+static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return;
+
+ mlx5_esw_for_each_vport(esw, i, vport)
+ esw_offloads_vport_metadata_cleanup(esw, vport);
+}
+
+static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return 0;
+
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ err = esw_offloads_vport_metadata_setup(esw, vport);
+ if (err)
+ goto metadata_err;
+ }
+
+ return 0;
+
+metadata_err:
+ esw_offloads_metadata_uninit(esw);
+ return err;
+}
+
+int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable)
+{
+ int err = 0;
+
+ down_write(&esw->mode_lock);
+ if (mlx5_esw_is_fdb_created(esw)) {
+ err = -EBUSY;
+ goto done;
+ }
+ if (!mlx5_esw_vport_match_metadata_supported(esw)) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+ if (enable)
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+ else
+ esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
+done:
+ up_write(&esw->mode_lock);
+ return err;
+}
+
+int
+esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int err;
+
+ err = esw_acl_ingress_ofld_setup(esw, vport);
+ if (err)
+ return err;
+
+ err = esw_acl_egress_ofld_setup(esw, vport);
+ if (err)
+ goto egress_err;
+
+ return 0;
+
+egress_err:
+ esw_acl_ingress_ofld_cleanup(esw, vport);
+ return err;
+}
+
+void
+esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ esw_acl_egress_ofld_cleanup(vport);
+ esw_acl_ingress_ofld_cleanup(esw, vport);
+}
+
+static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ return esw_vport_create_offloads_acl_tables(esw, vport);
+}
+
+static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ if (IS_ERR(vport))
+ return;
+
+ esw_vport_destroy_offloads_acl_tables(esw, vport);
+}
+
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ int ret;
+
+ if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return 0;
+
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
+ return 0;
+
+ ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
+ if (ret)
+ return ret;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
+ mlx5_esw_offloads_rep_load(esw, rep->vport);
+ }
+
+ return 0;
+}
+
+static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_indir_table *indir;
+ int err;
+
+ memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
+ mutex_init(&esw->fdb_table.offloads.vports.lock);
+ hash_init(esw->fdb_table.offloads.vports.table);
+ atomic64_set(&esw->user_count, 0);
+
+ indir = mlx5_esw_indir_table_init();
+ if (IS_ERR(indir)) {
+ err = PTR_ERR(indir);
+ goto create_indir_err;
+ }
+ esw->fdb_table.offloads.indir = indir;
+
+ err = esw_create_uplink_offloads_acl_tables(esw);
+ if (err)
+ goto create_acl_err;
+
+ err = esw_create_offloads_table(esw);
+ if (err)
+ goto create_offloads_err;
+
+ err = esw_create_restore_table(esw);
+ if (err)
+ goto create_restore_err;
+
+ err = esw_create_offloads_fdb_tables(esw);
+ if (err)
+ goto create_fdb_err;
+
+ err = esw_create_vport_rx_group(esw);
+ if (err)
+ goto create_fg_err;
+
+ err = esw_create_vport_rx_drop_group(esw);
+ if (err)
+ goto create_rx_drop_fg_err;
+
+ err = esw_create_vport_rx_drop_rule(esw);
+ if (err)
+ goto create_rx_drop_rule_err;
+
+ return 0;
+
+create_rx_drop_rule_err:
+ esw_destroy_vport_rx_drop_group(esw);
+create_rx_drop_fg_err:
+ esw_destroy_vport_rx_group(esw);
+create_fg_err:
+ esw_destroy_offloads_fdb_tables(esw);
+create_fdb_err:
+ esw_destroy_restore_table(esw);
+create_restore_err:
+ esw_destroy_offloads_table(esw);
+create_offloads_err:
+ esw_destroy_uplink_offloads_acl_tables(esw);
+create_acl_err:
+ mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
+create_indir_err:
+ mutex_destroy(&esw->fdb_table.offloads.vports.lock);
+ return err;
+}
+
+static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
+{
+ esw_destroy_vport_rx_drop_rule(esw);
+ esw_destroy_vport_rx_drop_group(esw);
+ esw_destroy_vport_rx_group(esw);
+ esw_destroy_offloads_fdb_tables(esw);
+ esw_destroy_restore_table(esw);
+ esw_destroy_offloads_table(esw);
+ esw_destroy_uplink_offloads_acl_tables(esw);
+ mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
+ mutex_destroy(&esw->fdb_table.offloads.vports.lock);
+}
+
+static void
+esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
+{
+ struct devlink *devlink;
+ bool host_pf_disabled;
+ u16 new_num_vfs;
+
+ new_num_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_num_of_vfs);
+ host_pf_disabled = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_pf_disabled);
+
+ if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled)
+ return;
+
+ devlink = priv_to_devlink(esw->dev);
+ devl_lock(devlink);
+ /* Number of VFs can only change from "0 to x" or "x to 0". */
+ if (esw->esw_funcs.num_vfs > 0) {
+ mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
+ } else {
+ int err;
+
+ err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs,
+ MLX5_VPORT_UC_ADDR_CHANGE);
+ if (err) {
+ devl_unlock(devlink);
+ return;
+ }
+ }
+ esw->esw_funcs.num_vfs = new_num_vfs;
+ devl_unlock(devlink);
+}
+
+static void esw_functions_changed_event_handler(struct work_struct *work)
+{
+ struct mlx5_host_work *host_work;
+ struct mlx5_eswitch *esw;
+ const u32 *out;
+
+ host_work = container_of(work, struct mlx5_host_work, work);
+ esw = host_work->esw;
+
+ out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(out))
+ goto out;
+
+ esw_vfs_changed_event_handler(esw, out);
+ kvfree(out);
+out:
+ kfree(host_work);
+}
+
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_esw_functions *esw_funcs;
+ struct mlx5_host_work *host_work;
+ struct mlx5_eswitch *esw;
+
+ host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
+ if (!host_work)
+ return NOTIFY_DONE;
+
+ esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb);
+ esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs);
+
+ host_work->esw = esw;
+
+ INIT_WORK(&host_work->work, esw_functions_changed_event_handler);
+ queue_work(esw->work_queue, &host_work->work);
+
+ return NOTIFY_OK;
+}
+
+static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw)
+{
+ const u32 *query_host_out;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return 0;
+
+ query_host_out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(query_host_out))
+ return PTR_ERR(query_host_out);
+
+ /* Mark non local controller with non zero controller number. */
+ esw->offloads.host_number = MLX5_GET(query_esw_functions_out, query_host_out,
+ host_params_context.host_number);
+ kvfree(query_host_out);
+ return 0;
+}
+
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller)
+{
+ /* Local controller is always valid */
+ if (controller == 0)
+ return true;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return false;
+
+ /* External host number starts with zero in device */
+ return (controller == esw->offloads.host_number + 1);
+}
+
+int esw_offloads_enable(struct mlx5_eswitch *esw)
+{
+ struct mapping_ctx *reg_c0_obj_pool;
+ struct mlx5_vport *vport;
+ unsigned long i;
+ u64 mapping_id;
+ int err;
+
+ mutex_init(&esw->offloads.termtbl_mutex);
+ mlx5_rdma_enable_roce(esw->dev);
+
+ err = mlx5_esw_host_number_init(esw);
+ if (err)
+ goto err_metadata;
+
+ err = esw_offloads_metadata_init(esw);
+ if (err)
+ goto err_metadata;
+
+ err = esw_set_passing_vport_metadata(esw, true);
+ if (err)
+ goto err_vport_metadata;
+
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ ESW_REG_C0_USER_DATA_METADATA_MASK,
+ true);
+
+ if (IS_ERR(reg_c0_obj_pool)) {
+ err = PTR_ERR(reg_c0_obj_pool);
+ goto err_pool;
+ }
+ esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool;
+
+ err = esw_offloads_steering_init(esw);
+ if (err)
+ goto err_steering_init;
+
+ /* Representor will control the vport link state */
+ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN;
+
+ /* Uplink vport rep must load first. */
+ err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK);
+ if (err)
+ goto err_uplink;
+
+ err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
+ if (err)
+ goto err_vports;
+
+ return 0;
+
+err_vports:
+ esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
+err_uplink:
+ esw_offloads_steering_cleanup(esw);
+err_steering_init:
+ mapping_destroy(reg_c0_obj_pool);
+err_pool:
+ esw_set_passing_vport_metadata(esw, false);
+err_vport_metadata:
+ esw_offloads_metadata_uninit(esw);
+err_metadata:
+ mlx5_rdma_disable_roce(esw->dev);
+ mutex_destroy(&esw->offloads.termtbl_mutex);
+ return err;
+}
+
+static int esw_offloads_stop(struct mlx5_eswitch *esw,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ esw->mode = MLX5_ESWITCH_LEGACY;
+
+ /* If changing from switchdev to legacy mode without sriov enabled,
+ * no need to create legacy fdb.
+ */
+ if (!mlx5_sriov_is_enabled(esw->dev))
+ return 0;
+
+ err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
+
+ return err;
+}
+
+void esw_offloads_disable(struct mlx5_eswitch *esw)
+{
+ mlx5_eswitch_disable_pf_vf_vports(esw);
+ esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
+ esw_set_passing_vport_metadata(esw, false);
+ esw_offloads_steering_cleanup(esw);
+ mapping_destroy(esw->offloads.reg_c0_obj_pool);
+ esw_offloads_metadata_uninit(esw);
+ mlx5_rdma_disable_roce(esw->dev);
+ mutex_destroy(&esw->offloads.termtbl_mutex);
+}
+
+static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
+{
+ switch (mode) {
+ case DEVLINK_ESWITCH_MODE_LEGACY:
+ *mlx5_mode = MLX5_ESWITCH_LEGACY;
+ break;
+ case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+ *mlx5_mode = MLX5_ESWITCH_OFFLOADS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
+{
+ switch (mlx5_mode) {
+ case MLX5_ESWITCH_LEGACY:
+ *mode = DEVLINK_ESWITCH_MODE_LEGACY;
+ break;
+ case MLX5_ESWITCH_OFFLOADS:
+ *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
+{
+ switch (mode) {
+ case DEVLINK_ESWITCH_INLINE_MODE_NONE:
+ *mlx5_mode = MLX5_INLINE_MODE_NONE;
+ break;
+ case DEVLINK_ESWITCH_INLINE_MODE_LINK:
+ *mlx5_mode = MLX5_INLINE_MODE_L2;
+ break;
+ case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
+ *mlx5_mode = MLX5_INLINE_MODE_IP;
+ break;
+ case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
+ *mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
+{
+ switch (mlx5_mode) {
+ case MLX5_INLINE_MODE_NONE:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
+ break;
+ case MLX5_INLINE_MODE_L2:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
+ break;
+ case MLX5_INLINE_MODE_IP:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
+ break;
+ case MLX5_INLINE_MODE_TCP_UDP:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink)
+{
+ struct net *devl_net, *netdev_net;
+ struct mlx5_eswitch *esw;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev);
+ devl_net = devlink_net(devlink);
+
+ return net_eq(devl_net, netdev_net);
+}
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
+{
+ u16 cur_mlx5_mode, mlx5_mode = 0;
+ struct mlx5_eswitch *esw;
+ int err = 0;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ if (esw_mode_from_devlink(mode, &mlx5_mode))
+ return -EINVAL;
+
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV &&
+ !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's.");
+ return -EPERM;
+ }
+
+ mlx5_lag_disable_change(esw->dev);
+ err = mlx5_esw_try_lock(esw);
+ if (err < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
+ goto enable_lag;
+ }
+ cur_mlx5_mode = err;
+ err = 0;
+
+ if (cur_mlx5_mode == mlx5_mode)
+ goto unlock;
+
+ mlx5_eswitch_disable_locked(esw);
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
+ if (mlx5_devlink_trap_get_num_active(esw->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't change mode while devlink traps are active");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+ err = esw_offloads_start(esw, extack);
+ } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) {
+ err = esw_offloads_stop(esw, extack);
+ mlx5_rescan_drivers(esw->dev);
+ } else {
+ err = -EINVAL;
+ }
+
+unlock:
+ mlx5_esw_unlock(esw);
+enable_lag:
+ mlx5_lag_enable_change(esw->dev);
+ return err;
+}
+
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ down_write(&esw->mode_lock);
+ err = esw_mode_to_devlink(esw->mode, mode);
+ up_write(&esw->mode_lock);
+ return err;
+}
+
+static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_vport *vport;
+ u16 err_vport_num = 0;
+ unsigned long i;
+ int err = 0;
+
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode);
+ if (err) {
+ err_vport_num = vport->vport;
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to set min inline on vport");
+ goto revert_inline_mode;
+ }
+ }
+ return 0;
+
+revert_inline_mode:
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ if (vport->vport == err_vport_num)
+ break;
+ mlx5_modify_nic_vport_min_inline(dev,
+ vport->vport,
+ esw->offloads.inline_mode);
+ }
+ return err;
+}
+
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw;
+ u8 mlx5_mode;
+ int err;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ down_write(&esw->mode_lock);
+
+ switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) {
+ err = 0;
+ goto out;
+ }
+
+ fallthrough;
+ case MLX5_CAP_INLINE_MODE_L2:
+ NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
+ err = -EOPNOTSUPP;
+ goto out;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ break;
+ }
+
+ if (atomic64_read(&esw->offloads.num_flows) > 0) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't set inline mode when flows are configured");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
+ if (err)
+ goto out;
+
+ err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack);
+ if (err)
+ goto out;
+
+ esw->offloads.inline_mode = mlx5_mode;
+ up_write(&esw->mode_lock);
+ return 0;
+
+out:
+ up_write(&esw->mode_lock);
+ return err;
+}
+
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
+{
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ down_write(&esw->mode_lock);
+ err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+ up_write(&esw->mode_lock);
+ return err;
+}
+
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw;
+ int err = 0;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ down_write(&esw->mode_lock);
+
+ if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
+ (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
+ esw->offloads.encap = encap;
+ goto unlock;
+ }
+
+ if (esw->offloads.encap == encap)
+ goto unlock;
+
+ if (atomic64_read(&esw->offloads.num_flows) > 0) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't set encapsulation when flows are configured");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ esw_destroy_offloads_fdb_tables(esw);
+
+ esw->offloads.encap = encap;
+
+ err = esw_create_offloads_fdb_tables(esw);
+
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed re-creating fast FDB table");
+ esw->offloads.encap = !encap;
+ (void)esw_create_offloads_fdb_tables(esw);
+ }
+
+unlock:
+ up_write(&esw->mode_lock);
+ return err;
+}
+
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *encap)
+{
+ struct mlx5_eswitch *esw;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ down_write(&esw->mode_lock);
+ *encap = esw->offloads.encap;
+ up_write(&esw->mode_lock);
+ return 0;
+}
+
+static bool
+mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+ /* Currently, only ECPF based device has representor for host PF. */
+ if (vport_num == MLX5_VPORT_PF &&
+ !mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return false;
+
+ if (vport_num == MLX5_VPORT_ECPF &&
+ !mlx5_ecpf_vport_exists(esw->dev))
+ return false;
+
+ return true;
+}
+
+void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
+ const struct mlx5_eswitch_rep_ops *ops,
+ u8 rep_type)
+{
+ struct mlx5_eswitch_rep_data *rep_data;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ esw->offloads.rep_ops[rep_type] = ops;
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) {
+ rep->esw = esw;
+ rep_data = &rep->rep_data[rep_type];
+ atomic_set(&rep_data->state, REP_REGISTERED);
+ }
+ }
+}
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
+
+void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ __unload_reps_all_vport(esw, rep_type);
+
+ mlx5_esw_for_each_rep(esw, i, rep)
+ atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
+}
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
+
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ return rep->rep_data[rep_type].priv;
+}
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+ u16 vport,
+ u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+
+ rep = mlx5_eswitch_get_rep(esw, vport);
+
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ esw->offloads.rep_ops[rep_type]->get_proto_dev)
+ return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep);
+ return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+ u16 vport)
+{
+ return mlx5_eswitch_get_rep(esw, vport);
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
+
+bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw)
+{
+ return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED);
+}
+EXPORT_SYMBOL(mlx5_eswitch_reg_c1_loopback_enabled);
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+ return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
+
+u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (WARN_ON_ONCE(IS_ERR(vport)))
+ return 0;
+
+ return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
+
+int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
+ u16 vport_num, u32 controller, u32 sfnum)
+{
+ int err;
+
+ err = mlx5_esw_vport_enable(esw, vport_num, MLX5_VPORT_UC_ADDR_CHANGE);
+ if (err)
+ return err;
+
+ err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, controller, sfnum);
+ if (err)
+ goto devlink_err;
+
+ mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum);
+ err = mlx5_esw_offloads_rep_load(esw, vport_num);
+ if (err)
+ goto rep_err;
+ return 0;
+
+rep_err:
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+ mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
+devlink_err:
+ mlx5_esw_vport_disable(esw, vport_num);
+ return err;
+}
+
+void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ mlx5_esw_offloads_rep_unload(esw, vport_num);
+ mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+ mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
+ mlx5_esw_vport_disable(esw, vport_num);
+}
+
+static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ *vhca_id = 0;
+ if (mlx5_esw_is_manager_vport(esw, vport_num) ||
+ !MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ return -EPERM;
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id);
+
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
+int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ u16 *old_entry, *vhca_map_entry, vhca_id;
+ int err;
+
+ err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
+ if (err) {
+ esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n",
+ vport_num, err);
+ return err;
+ }
+
+ vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL);
+ if (!vhca_map_entry)
+ return -ENOMEM;
+
+ *vhca_map_entry = vport_num;
+ old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL);
+ if (xa_is_err(old_entry)) {
+ kfree(vhca_map_entry);
+ return xa_err(old_entry);
+ }
+ kfree(old_entry);
+ return 0;
+}
+
+void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ u16 *vhca_map_entry, vhca_id;
+ int err;
+
+ err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
+ if (err)
+ esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n",
+ vport_num, err);
+
+ vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id);
+ kfree(vhca_map_entry);
+}
+
+int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num)
+{
+ u16 *res = xa_load(&esw->offloads.vhca_map, vhca_id);
+
+ if (!res)
+ return -ENOENT;
+
+ *vport_num = *res;
+ return 0;
+}
+
+u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (WARN_ON_ONCE(IS_ERR(vport)))
+ return 0;
+
+ return vport->metadata;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set);
+
+static bool
+is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num == MLX5_VPORT_PF ||
+ mlx5_eswitch_is_vf_vport(esw, vport_num) ||
+ mlx5_esw_is_sf_vport(esw, vport_num);
+}
+
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
+ u8 *hw_addr, int *hw_addr_len,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ u16 vport_num;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+ if (!is_port_function_supported(esw, vport_num))
+ return -EOPNOTSUPP;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ ether_addr_copy(hw_addr, vport->info.mac);
+ *hw_addr_len = ETH_ALEN;
+ mutex_unlock(&esw->state_lock);
+ return 0;
+}
+
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const u8 *hw_addr, int hw_addr_len,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ u16 vport_num;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw)) {
+ NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
+ return PTR_ERR(esw);
+ }
+
+ vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+ if (!is_port_function_supported(esw, vport_num)) {
+ NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr");
+ return -EINVAL;
+ }
+
+ return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
new file mode 100644
index 000000000..edd910258
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <linux/mlx5/fs.h>
+#include "eswitch.h"
+#include "en_tc.h"
+#include "fs_core.h"
+
+struct mlx5_termtbl_handle {
+ struct hlist_node termtbl_hlist;
+
+ struct mlx5_flow_table *termtbl;
+ struct mlx5_flow_act flow_act;
+ struct mlx5_flow_destination dest;
+
+ struct mlx5_flow_handle *rule;
+ int ref_count;
+};
+
+static u32
+mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest)
+{
+ u32 hash;
+
+ hash = jhash_1word(flow_act->action, 0);
+ hash = jhash((const void *)&flow_act->vlan,
+ sizeof(flow_act->vlan), hash);
+ hash = jhash((const void *)&dest->vport.num,
+ sizeof(dest->vport.num), hash);
+ hash = jhash((const void *)&dest->vport.vhca_id,
+ sizeof(dest->vport.num), hash);
+ if (flow_act->pkt_reformat)
+ hash = jhash(flow_act->pkt_reformat,
+ sizeof(*flow_act->pkt_reformat),
+ hash);
+ return hash;
+}
+
+static int
+mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1,
+ struct mlx5_flow_destination *dest1,
+ struct mlx5_flow_act *flow_act2,
+ struct mlx5_flow_destination *dest2)
+{
+ int ret;
+
+ ret = flow_act1->action != flow_act2->action ||
+ dest1->vport.num != dest2->vport.num ||
+ dest1->vport.vhca_id != dest2->vport.vhca_id ||
+ memcmp(&flow_act1->vlan, &flow_act2->vlan,
+ sizeof(flow_act1->vlan));
+ if (ret)
+ return ret;
+
+ if (flow_act1->pkt_reformat && flow_act2->pkt_reformat)
+ return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat,
+ sizeof(*flow_act1->pkt_reformat));
+
+ return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat);
+}
+
+static int
+mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
+ struct mlx5_termtbl_handle *tt,
+ struct mlx5_flow_act *flow_act)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *root_ns;
+ int err, err2;
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* As this is the terminating action then the termination table is the
+ * same prio as the slow path
+ */
+ ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED |
+ MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft_attr.prio = FDB_TC_OFFLOAD;
+ ft_attr.max_fte = 1;
+ ft_attr.level = 1;
+ ft_attr.autogroup.max_num_groups = 1;
+ tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(tt->termtbl)) {
+ err = PTR_ERR(tt->termtbl);
+ esw_warn(dev, "Failed to create termination table, err %pe\n", tt->termtbl);
+ return err;
+ }
+
+ tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act,
+ &tt->dest, 1);
+ if (IS_ERR(tt->rule)) {
+ err = PTR_ERR(tt->rule);
+ esw_warn(dev, "Failed to create termination table rule, err %pe\n", tt->rule);
+ goto add_flow_err;
+ }
+ return 0;
+
+add_flow_err:
+ err2 = mlx5_destroy_flow_table(tt->termtbl);
+ if (err2)
+ esw_warn(dev, "Failed to destroy termination table, err %d\n", err2);
+
+ return err;
+}
+
+static struct mlx5_termtbl_handle *
+mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_termtbl_handle *tt;
+ bool found = false;
+ u32 hash_key;
+ int err;
+
+ mutex_lock(&esw->offloads.termtbl_mutex);
+ hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest);
+ hash_for_each_possible(esw->offloads.termtbl_tbl, tt,
+ termtbl_hlist, hash_key) {
+ if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest,
+ flow_act, dest)) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ goto tt_add_ref;
+
+ tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+ if (!tt) {
+ err = -ENOMEM;
+ goto tt_create_err;
+ }
+
+ tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ tt->dest.vport.num = dest->vport.num;
+ tt->dest.vport.vhca_id = dest->vport.vhca_id;
+ tt->dest.vport.flags = dest->vport.flags;
+ memcpy(&tt->flow_act, flow_act, sizeof(*flow_act));
+
+ err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act);
+ if (err)
+ goto tt_create_err;
+
+ hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key);
+tt_add_ref:
+ tt->ref_count++;
+ mutex_unlock(&esw->offloads.termtbl_mutex);
+ return tt;
+tt_create_err:
+ kfree(tt);
+ mutex_unlock(&esw->offloads.termtbl_mutex);
+ return ERR_PTR(err);
+}
+
+void
+mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
+ struct mlx5_termtbl_handle *tt)
+{
+ mutex_lock(&esw->offloads.termtbl_mutex);
+ if (--tt->ref_count == 0)
+ hash_del(&tt->termtbl_hlist);
+ mutex_unlock(&esw->offloads.termtbl_mutex);
+
+ if (!tt->ref_count) {
+ mlx5_del_flow_rules(tt->rule);
+ mlx5_destroy_flow_table(tt->termtbl);
+ kfree(tt);
+ }
+}
+
+static void
+mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
+ struct mlx5_flow_act *dst)
+{
+ if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+ src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0]));
+ memset(&src->vlan[0], 0, sizeof(src->vlan[0]));
+
+ if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+ src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1]));
+ memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
+ }
+ }
+}
+
+static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
+ const struct mlx5_flow_spec *spec)
+{
+ u16 port_mask, port_value;
+
+ if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+ return spec->flow_context.flow_source ==
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+ port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
+ misc_parameters.source_port);
+ port_value = MLX5_GET(fte_match_param, spec->match_value,
+ misc_parameters.source_port);
+ return (port_mask & port_value) == MLX5_VPORT_UPLINK;
+}
+
+bool
+mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int i;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) ||
+ mlx5e_tc_attr_flags_skip(attr->flags) ||
+ (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port))
+ return false;
+
+ /* push vlan on RX */
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+ !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) &
+ MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX))
+ return true;
+
+ /* hairpin */
+ for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
+ if (!esw_attr->dest_int_port && esw_attr->dests[i].rep &&
+ esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK)
+ return true;
+
+ return false;
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int num_dest)
+{
+ struct mlx5_flow_act term_tbl_act = {};
+ struct mlx5_flow_handle *rule = NULL;
+ bool term_table_created = false;
+ int num_vport_dests = 0;
+ int i, curr_dest;
+
+ mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act);
+ term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ for (i = 0; i < num_dest; i++) {
+ struct mlx5_termtbl_handle *tt;
+
+ /* only vport destinations can be terminated */
+ if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT)
+ continue;
+
+ if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) {
+ term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat;
+ } else {
+ term_tbl_act.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ term_tbl_act.pkt_reformat = NULL;
+ }
+
+ /* get the terminating table for the action list */
+ tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
+ &dest[i], attr);
+ if (IS_ERR(tt)) {
+ esw_warn(esw->dev, "Failed to get termination table, err %pe\n", tt);
+ goto revert_changes;
+ }
+ attr->dests[num_vport_dests].termtbl = tt;
+ num_vport_dests++;
+
+ /* link the destination with the termination table */
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = tt->termtbl;
+ term_table_created = true;
+ }
+
+ /* at least one destination should reference a termination table */
+ if (!term_table_created)
+ goto revert_changes;
+
+ /* create the FTE */
+ flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act->pkt_reformat = NULL;
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
+ if (IS_ERR(rule))
+ goto revert_changes;
+
+ goto out;
+
+revert_changes:
+ /* revert the changes that were made to the original flow_act
+ * and fall-back to the original rule actions
+ */
+ mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act);
+
+ for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) {
+ struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl;
+
+ attr->dests[curr_dest].termtbl = NULL;
+
+ /* search for the destination associated with the
+ * current term table
+ */
+ for (i = 0; i < num_dest; i++) {
+ if (dest[i].ft != tt->termtbl)
+ continue;
+
+ memset(&dest[i], 0, sizeof(dest[i]));
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[i].vport.num = tt->dest.vport.num;
+ dest[i].vport.vhca_id = tt->dest.vport.vhca_id;
+ mlx5_eswitch_termtbl_put(esw, tt);
+ break;
+ }
+ }
+ rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
+out:
+ return rule;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
new file mode 100644
index 000000000..9459e56ee
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -0,0 +1,446 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+struct mlx5_event_nb {
+ struct mlx5_nb nb;
+ void *ctx;
+};
+
+/* General events handlers for the low level mlx5_core driver
+ *
+ * Other Major feature specific events such as
+ * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
+ * separate notifiers callbacks, specifically by those mlx5 components.
+ */
+static int any_notifier(struct notifier_block *, unsigned long, void *);
+static int temp_warn(struct notifier_block *, unsigned long, void *);
+static int port_module(struct notifier_block *, unsigned long, void *);
+static int pcie_core(struct notifier_block *, unsigned long, void *);
+
+/* handler which forwards the event to events->fw_nh, driver notifiers */
+static int forward_event(struct notifier_block *, unsigned long, void *);
+
+static struct mlx5_nb events_nbs_ref[] = {
+ /* Events to be processed by mlx5_core */
+ {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
+ {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
+ {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+ {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+
+ /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE },
+ /* QP/WQ resource events to forward */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
+ /* SRQ events */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
+};
+
+struct mlx5_events {
+ struct mlx5_core_dev *dev;
+ struct workqueue_struct *wq;
+ struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
+ /* driver notifier chain for fw events */
+ struct atomic_notifier_head fw_nh;
+ /* port module events stats */
+ struct mlx5_pme_stats pme_stats;
+ /*pcie_core*/
+ struct work_struct pcie_core_work;
+ /* driver notifier chain for sw events */
+ struct blocking_notifier_head sw_nh;
+};
+
+static const char *eqe_type_str(u8 type)
+{
+ switch (type) {
+ case MLX5_EVENT_TYPE_COMP:
+ return "MLX5_EVENT_TYPE_COMP";
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ return "MLX5_EVENT_TYPE_PATH_MIG";
+ case MLX5_EVENT_TYPE_COMM_EST:
+ return "MLX5_EVENT_TYPE_COMM_EST";
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ return "MLX5_EVENT_TYPE_SQ_DRAINED";
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ return "MLX5_EVENT_TYPE_CQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+ return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return "MLX5_EVENT_TYPE_PORT_CHANGE";
+ case MLX5_EVENT_TYPE_GPIO_EVENT:
+ return "MLX5_EVENT_TYPE_GPIO_EVENT";
+ case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+ return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+ case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+ return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+ case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+ return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+ case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+ return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+ case MLX5_EVENT_TYPE_STALL_EVENT:
+ return "MLX5_EVENT_TYPE_STALL_EVENT";
+ case MLX5_EVENT_TYPE_CMD:
+ return "MLX5_EVENT_TYPE_CMD";
+ case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
+ return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
+ case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
+ return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
+ case MLX5_EVENT_TYPE_PAGE_REQUEST:
+ return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+ case MLX5_EVENT_TYPE_PAGE_FAULT:
+ return "MLX5_EVENT_TYPE_PAGE_FAULT";
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ return "MLX5_EVENT_TYPE_PPS_EVENT";
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_ERROR";
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+ case MLX5_EVENT_TYPE_MONITOR_COUNTER:
+ return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
+ case MLX5_EVENT_TYPE_DEVICE_TRACER:
+ return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+ case MLX5_EVENT_TYPE_OBJECT_CHANGE:
+ return "MLX5_EVENT_TYPE_OBJECT_CHANGE";
+ default:
+ return "Unrecognized event";
+ }
+}
+
+/* handles all FW events, type == eqe->type */
+static int any_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ return NOTIFY_OK;
+}
+
+/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
+static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+ u64 value_lsb;
+ u64 value_msb;
+
+ value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+ value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+ mlx5_core_warn(events->dev,
+ "High temperature on sensors with bit set %llx %llx",
+ value_msb, value_lsb);
+
+ return NOTIFY_OK;
+}
+
+/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
+{
+ switch (status) {
+ case MLX5_MODULE_STATUS_PLUGGED:
+ return "Cable plugged";
+ case MLX5_MODULE_STATUS_UNPLUGGED:
+ return "Cable unplugged";
+ case MLX5_MODULE_STATUS_ERROR:
+ return "Cable error";
+ case MLX5_MODULE_STATUS_DISABLED:
+ return "Cable disabled";
+ default:
+ return "Unknown status";
+ }
+}
+
+static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
+{
+ switch (error) {
+ case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
+ return "Power budget exceeded";
+ case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
+ return "Long Range for non MLNX cable";
+ case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
+ return "Bus stuck (I2C or data shorted)";
+ case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
+ return "No EEPROM/retry timeout";
+ case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
+ return "Enforce part number list";
+ case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
+ return "Unknown identifier";
+ case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
+ return "High Temperature";
+ case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
+ return "Bad or shorted cable/module";
+ case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
+ return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
+ default:
+ return "Unknown error";
+ }
+}
+
+/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static int port_module(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ enum port_module_event_status_type module_status;
+ enum port_module_event_error_type error_type;
+ struct mlx5_eqe_port_module *module_event_eqe;
+ const char *status_str;
+ u8 module_num;
+
+ module_event_eqe = &eqe->data.port_module;
+ module_status = module_event_eqe->module_status &
+ PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+ error_type = module_event_eqe->error_type &
+ PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+ if (module_status < MLX5_MODULE_STATUS_NUM)
+ events->pme_stats.status_counters[module_status]++;
+
+ if (module_status == MLX5_MODULE_STATUS_ERROR)
+ if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
+ events->pme_stats.error_counters[error_type]++;
+
+ if (!printk_ratelimit())
+ return NOTIFY_OK;
+
+ module_num = module_event_eqe->module;
+ status_str = mlx5_pme_status_to_string(module_status);
+ if (module_status == MLX5_MODULE_STATUS_ERROR) {
+ const char *error_str = mlx5_pme_error_to_string(error_type);
+
+ mlx5_core_err(events->dev,
+ "Port module event[error]: module %u, %s, %s\n",
+ module_num, status_str, error_str);
+ } else {
+ mlx5_core_info(events->dev,
+ "Port module event: module %u, %s\n",
+ module_num, status_str);
+ }
+
+ return NOTIFY_OK;
+}
+
+enum {
+ MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
+ MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
+ MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
+};
+
+static void mlx5_pcie_event(struct work_struct *work)
+{
+ u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
+ struct mlx5_events *events;
+ struct mlx5_core_dev *dev;
+ u8 power_status;
+ u16 pci_power;
+
+ events = container_of(work, struct mlx5_events, pcie_core_work);
+ dev = events->dev;
+
+ if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
+ return;
+
+ mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MPEIN, 0, 0);
+ power_status = MLX5_GET(mpein_reg, out, pwr_status);
+ pci_power = MLX5_GET(mpein_reg, out, pci_power);
+
+ switch (power_status) {
+ case MLX5_PCI_POWER_COULD_NOT_BE_READ:
+ mlx5_core_info_rl(dev,
+ "PCIe slot power capability was not advertised.\n");
+ break;
+ case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
+ mlx5_core_warn_rl(dev,
+ "Detected insufficient power on the PCIe slot (%uW).\n",
+ pci_power);
+ break;
+ case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
+ mlx5_core_info_rl(dev,
+ "PCIe slot advertised sufficient power (%uW).\n",
+ pci_power);
+ break;
+ }
+}
+
+static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb,
+ struct mlx5_event_nb,
+ nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
+ queue_work(events->wq, &events->pcie_core_work);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
+{
+ *stats = dev->priv.events->pme_stats;
+}
+
+/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
+static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ atomic_notifier_call_chain(&events->fw_nh, event, data);
+ return NOTIFY_OK;
+}
+
+int mlx5_events_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
+
+ if (!events)
+ return -ENOMEM;
+
+ ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
+ events->dev = dev;
+ dev->priv.events = events;
+ events->wq = create_singlethread_workqueue("mlx5_events");
+ if (!events->wq) {
+ kfree(events);
+ return -ENOMEM;
+ }
+ INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
+ BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
+
+ return 0;
+}
+
+void mlx5_events_cleanup(struct mlx5_core_dev *dev)
+{
+ destroy_workqueue(dev->priv.events->wq);
+ kvfree(dev->priv.events);
+}
+
+void mlx5_events_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
+ events->notifiers[i].nb = events_nbs_ref[i];
+ events->notifiers[i].ctx = events;
+ mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
+ }
+}
+
+void mlx5_events_stop(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
+ mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+ flush_workqueue(events->wq);
+}
+
+/* This API is used only for processing and forwarding firmware
+ * events to mlx5 consumer.
+ */
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_register(&events->fw_nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_register);
+
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_unregister(&events->fw_nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_unregister);
+
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
+{
+ return atomic_notifier_call_chain(&events->fw_nh, event, data);
+}
+
+/* This API is used only for processing and forwarding driver-specific
+ * events to mlx5 consumers.
+ */
+int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return blocking_notifier_chain_register(&events->sw_nh, nb);
+}
+
+int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return blocking_notifier_chain_unregister(&events->sw_nh, nb);
+}
+
+int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
+ void *data)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return blocking_notifier_call_chain(&events->sw_nh, event, data);
+}
+
+void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work)
+{
+ queue_work(dev->priv.events->wq, work);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
new file mode 100644
index 000000000..9a3707715
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+
+#include "mlx5_core.h"
+#include "fpga/cmd.h"
+
+#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \
+ MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+ void *buf, bool write)
+{
+ u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0};
+ u32 out[MLX5_FPGA_ACCESS_REG_SZ];
+ int err;
+
+ if (size & 3)
+ return -EINVAL;
+ if (addr & 3)
+ return -EINVAL;
+ if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+ return -EINVAL;
+
+ MLX5_SET(fpga_access_reg, in, size, size);
+ MLX5_SET64(fpga_access_reg, in, address, addr);
+ if (write)
+ memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_ACCESS_REG, 0, write);
+ if (err)
+ return err;
+
+ if (!write)
+ memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size);
+
+ return 0;
+}
+
+int mlx5_fpga_caps(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0};
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga,
+ MLX5_ST_SZ_BYTES(fpga_cap),
+ MLX5_REG_FPGA_CAP, 0, 0);
+}
+
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+
+ MLX5_SET(fpga_ctrl, in, operation, op);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_CTRL, 0, true);
+}
+
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size)
+{
+ unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len);
+ u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr);
+ unsigned int read;
+ int ret = 0;
+
+ if (cap_size > size) {
+ mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u",
+ size, cap_size);
+ return -EINVAL;
+ }
+
+ while (cap_size > 0) {
+ read = min_t(unsigned int, cap_size,
+ MLX5_FPGA_ACCESS_REG_SIZE_MAX);
+
+ ret = mlx5_fpga_access_reg(dev, read, addr, caps, false);
+ if (ret) {
+ mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d",
+ read, addr, ret);
+ return ret;
+ }
+
+ cap_size -= read;
+ addr += read;
+ caps += read;
+ }
+
+ return ret;
+}
+
+int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+ int err;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_CTRL, 0, false);
+ if (err)
+ return err;
+
+ query->status = MLX5_GET(fpga_ctrl, out, status);
+ query->admin_image = MLX5_GET(fpga_ctrl, out, flash_select_admin);
+ query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper);
+ return 0;
+}
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+ u32 *fpga_qpn)
+{
+ u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {};
+ int ret;
+
+ MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
+ memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
+ MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
+
+ ret = mlx5_cmd_exec_inout(dev, fpga_create_qp, in, out);
+ if (ret)
+ return ret;
+
+ memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc),
+ MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc));
+ *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn);
+ return ret;
+}
+
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ enum mlx5_fpga_qpc_field_select fields,
+ void *fpga_qpc)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {};
+
+ MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
+ MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
+ MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn);
+ memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
+ MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
+
+ return mlx5_cmd_exec_in(dev, fpga_modify_qp, in);
+}
+
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
+ u32 fpga_qpn, void *fpga_qpc)
+{
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {};
+ int ret;
+
+ MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
+ MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
+
+ ret = mlx5_cmd_exec_inout(dev, fpga_query_qp, in, out);
+ if (ret)
+ return ret;
+
+ memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc),
+ MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc));
+ return ret;
+}
+
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {};
+
+ MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP);
+ MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn);
+
+ return mlx5_cmd_exec_in(dev, fpga_destroy_qp, in);
+}
+
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ bool clear, struct mlx5_fpga_qp_counters *data)
+{
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {};
+ int ret;
+
+ MLX5_SET(fpga_query_qp_counters_in, in, opcode,
+ MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS);
+ MLX5_SET(fpga_query_qp_counters_in, in, clear, clear);
+ MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn);
+
+ ret = mlx5_cmd_exec_inout(dev, fpga_query_qp_counters, in, out);
+ if (ret)
+ return ret;
+
+ data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_ack_packets);
+ data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_send_packets);
+ data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ tx_ack_packets);
+ data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ tx_send_packets);
+ data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_total_drop);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
new file mode 100644
index 000000000..11621d265
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_FPGA_H__
+#define __MLX5_FPGA_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_fpga_id {
+ MLX5_FPGA_NEWTON = 0,
+ MLX5_FPGA_EDISON = 1,
+ MLX5_FPGA_MORSE = 2,
+ MLX5_FPGA_MORSEQ = 3,
+};
+
+enum mlx5_fpga_image {
+ MLX5_FPGA_IMAGE_USER = 0,
+ MLX5_FPGA_IMAGE_FACTORY,
+};
+
+enum mlx5_fpga_status {
+ MLX5_FPGA_STATUS_SUCCESS = 0,
+ MLX5_FPGA_STATUS_FAILURE = 1,
+ MLX5_FPGA_STATUS_IN_PROGRESS = 2,
+ MLX5_FPGA_STATUS_NONE = 0xFFFF,
+};
+
+struct mlx5_fpga_query {
+ enum mlx5_fpga_image admin_image;
+ enum mlx5_fpga_image oper_image;
+ enum mlx5_fpga_status status;
+};
+
+enum mlx5_fpga_qpc_field_select {
+ MLX5_FPGA_QPC_STATE = BIT(0),
+};
+
+struct mlx5_fpga_qp_counters {
+ u64 rx_ack_packets;
+ u64 rx_send_packets;
+ u64 tx_ack_packets;
+ u64 tx_send_packets;
+ u64 rx_total_drop;
+};
+
+int mlx5_fpga_caps(struct mlx5_core_dev *dev);
+int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op);
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+ void *buf, bool write);
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size);
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+ u32 *fpga_qpn);
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc);
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc);
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ bool clear, struct mlx5_fpga_qp_counters *data);
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn);
+
+#endif /* __MLX5_FPGA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
new file mode 100644
index 000000000..12abe9915
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -0,0 +1,1001 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <net/addrconf.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/vport.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fpga/conn.h"
+
+#define MLX5_FPGA_PKEY 0xFFFF
+#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */
+#define MLX5_FPGA_RECV_SIZE 2048
+#define MLX5_FPGA_PORT_NUM 1
+#define MLX5_FPGA_CQ_BUDGET 64
+
+static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct device *dma_device;
+ int err = 0;
+
+ if (unlikely(!buf->sg[0].data))
+ goto out;
+
+ dma_device = mlx5_core_dma_dev(conn->fdev->mdev);
+ buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
+ buf->sg[0].size, buf->dma_dir);
+ err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (!buf->sg[1].data)
+ goto out;
+
+ buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data,
+ buf->sg[1].size, buf->dma_dir);
+ err = dma_mapping_error(dma_device, buf->sg[1].dma_addr);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err);
+ dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+ buf->sg[0].size, buf->dma_dir);
+ err = -ENOMEM;
+ }
+
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct device *dma_device;
+
+ dma_device = mlx5_core_dma_dev(conn->fdev->mdev);
+ if (buf->sg[1].data)
+ dma_unmap_single(dma_device, buf->sg[1].dma_addr,
+ buf->sg[1].size, buf->dma_dir);
+
+ if (likely(buf->sg[0].data))
+ dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+ buf->sg[0].size, buf->dma_dir);
+}
+
+static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_wqe_data_seg *data;
+ unsigned int ix;
+ int err = 0;
+
+ err = mlx5_fpga_conn_map_buf(conn, buf);
+ if (unlikely(err))
+ goto out;
+
+ if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) {
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ return -EBUSY;
+ }
+
+ ix = conn->qp.rq.pc & (conn->qp.rq.size - 1);
+ data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix);
+ data->byte_count = cpu_to_be32(buf->sg[0].size);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey);
+ data->addr = cpu_to_be64(buf->sg[0].dma_addr);
+
+ conn->qp.rq.pc++;
+ conn->qp.rq.bufs[ix] = buf;
+
+ /* Make sure that descriptors are written before doorbell record. */
+ dma_wmb();
+ *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff);
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe)
+{
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+ *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc);
+ /* Make sure that doorbell record is visible before ringing */
+ wmb();
+ mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET);
+}
+
+static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ struct mlx5_wqe_data_seg *data;
+ unsigned int ix, sgi;
+ int size = 1;
+
+ ix = conn->qp.sq.pc & (conn->qp.sq.size - 1);
+
+ ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix);
+ data = (void *)(ctrl + 1);
+
+ for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) {
+ if (!buf->sg[sgi].data)
+ break;
+ data->byte_count = cpu_to_be32(buf->sg[sgi].size);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey);
+ data->addr = cpu_to_be64(buf->sg[sgi].dma_addr);
+ data++;
+ size++;
+ }
+
+ ctrl->imm = 0;
+ ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) |
+ MLX5_OPCODE_SEND);
+ ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.qpn << 8));
+
+ conn->qp.sq.pc++;
+ conn->qp.sq.bufs[ix] = buf;
+ mlx5_fpga_conn_notify_hw(conn, ctrl);
+}
+
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ unsigned long flags;
+ int err;
+
+ if (!conn->qp.active)
+ return -ENOTCONN;
+
+ buf->dma_dir = DMA_TO_DEVICE;
+ err = mlx5_fpga_conn_map_buf(conn, buf);
+ if (err)
+ return err;
+
+ spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+ if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) {
+ list_add_tail(&buf->list, &conn->qp.sq.backlog);
+ goto out_unlock;
+ }
+
+ mlx5_fpga_conn_post_send(conn, buf);
+
+out_unlock:
+ spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+ return err;
+}
+
+static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int err;
+
+ buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->sg[0].data = (void *)(buf + 1);
+ buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+ buf->dma_dir = DMA_FROM_DEVICE;
+
+ err = mlx5_fpga_conn_post_recv(conn, buf);
+ if (err)
+ kfree(buf);
+
+ return err;
+}
+
+static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ u32 *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe, u8 status)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int ix, err;
+
+ ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1);
+ buf = conn->qp.rq.bufs[ix];
+ conn->qp.rq.bufs[ix] = NULL;
+ conn->qp.rq.cc++;
+
+ if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+ mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+ else
+ mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+
+ if (unlikely(status || !conn->qp.active)) {
+ conn->qp.active = false;
+ kfree(buf);
+ return;
+ }
+
+ buf->sg[0].size = be32_to_cpu(cqe->byte_cnt);
+ mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n",
+ buf->sg[0].size);
+ conn->recv_cb(conn->cb_arg, buf);
+
+ buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+ err = mlx5_fpga_conn_post_recv(conn, buf);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev,
+ "Failed to re-post recv buf: %d\n", err);
+ kfree(buf);
+ }
+}
+
+static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe, u8 status)
+{
+ struct mlx5_fpga_dma_buf *buf, *nextbuf;
+ unsigned long flags;
+ int ix;
+
+ spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+ ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1);
+ buf = conn->qp.sq.bufs[ix];
+ conn->qp.sq.bufs[ix] = NULL;
+ conn->qp.sq.cc++;
+
+ /* Handle backlog still under the spinlock to ensure message post order */
+ if (unlikely(!list_empty(&conn->qp.sq.backlog))) {
+ if (likely(conn->qp.active)) {
+ nextbuf = list_first_entry(&conn->qp.sq.backlog,
+ struct mlx5_fpga_dma_buf, list);
+ list_del(&nextbuf->list);
+ mlx5_fpga_conn_post_send(conn, nextbuf);
+ }
+ }
+
+ spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+
+ if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+ mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+ else
+ mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+
+ if (likely(buf->complete))
+ buf->complete(conn, conn->fdev, buf, status);
+
+ if (unlikely(status))
+ conn->qp.active = false;
+}
+
+static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe)
+{
+ u8 opcode, status = 0;
+
+ opcode = get_cqe_opcode(cqe);
+
+ switch (opcode) {
+ case MLX5_CQE_REQ_ERR:
+ status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+ fallthrough;
+ case MLX5_CQE_REQ:
+ mlx5_fpga_conn_sq_cqe(conn, cqe, status);
+ break;
+
+ case MLX5_CQE_RESP_ERR:
+ status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+ fallthrough;
+ case MLX5_CQE_RESP_SEND:
+ mlx5_fpga_conn_rq_cqe(conn, cqe, status);
+ break;
+ default:
+ mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n",
+ opcode);
+ }
+}
+
+static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn)
+{
+ mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT,
+ conn->fdev->conn_res.uar->map, conn->cq.wq.cc);
+}
+
+static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
+ unsigned int budget)
+{
+ struct mlx5_cqe64 *cqe;
+
+ while (budget) {
+ cqe = mlx5_cqwq_get_cqe(&conn->cq.wq);
+ if (!cqe)
+ break;
+
+ budget--;
+ mlx5_cqwq_pop(&conn->cq.wq);
+ mlx5_fpga_conn_handle_cqe(conn, cqe);
+ mlx5_cqwq_update_db_record(&conn->cq.wq);
+ }
+ if (!budget) {
+ tasklet_schedule(&conn->cq.tasklet);
+ return;
+ }
+
+ mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc);
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+ mlx5_fpga_conn_arm_cq(conn);
+}
+
+static void mlx5_fpga_conn_cq_tasklet(struct tasklet_struct *t)
+{
+ struct mlx5_fpga_conn *conn = from_tasklet(conn, t, cq.tasklet);
+
+ if (unlikely(!conn->qp.active))
+ return;
+ mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq,
+ struct mlx5_eqe *eqe)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+ if (unlikely(!conn->qp.active))
+ return;
+ mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ struct mlx5_wq_param wqp;
+ struct mlx5_cqe64 *cqe;
+ int inlen, err, eqn;
+ void *cqc, *in;
+ __be64 *pas;
+ u32 i;
+
+ cq_size = roundup_pow_of_two(cq_size);
+ MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size));
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq,
+ &conn->cq.wq_ctrl);
+ if (err)
+ return err;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) {
+ cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i);
+ cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * conn->cq.wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_cqwq;
+ }
+
+ err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn);
+ if (err) {
+ kvfree(in);
+ goto err_cqwq;
+ }
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
+ MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma);
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+ mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);
+
+ err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out));
+ kvfree(in);
+
+ if (err)
+ goto err_cqwq;
+
+ conn->cq.mcq.cqe_sz = 64;
+ conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db;
+ conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1;
+ *conn->cq.mcq.set_ci_db = 0;
+ *conn->cq.mcq.arm_db = 0;
+ conn->cq.mcq.vector = 0;
+ conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
+ conn->cq.mcq.uar = fdev->conn_res.uar;
+ tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet);
+
+ mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
+
+ goto out;
+
+err_cqwq:
+ mlx5_wq_destroy(&conn->cq.wq_ctrl);
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn)
+{
+ tasklet_disable(&conn->cq.tasklet);
+ tasklet_kill(&conn->cq.tasklet);
+ mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq);
+ mlx5_wq_destroy(&conn->cq.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ struct mlx5_wq_param wqp;
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq,
+ &conn->qp.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
+ unsigned int tx_size, unsigned int rx_size)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
+ void *in = NULL, *qpc;
+ int err, inlen;
+
+ conn->qp.rq.pc = 0;
+ conn->qp.rq.cc = 0;
+ conn->qp.rq.size = roundup_pow_of_two(rx_size);
+ conn->qp.sq.pc = 0;
+ conn->qp.sq.cc = 0;
+ conn->qp.sq.size = roundup_pow_of_two(tx_size);
+
+ MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size));
+ MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size));
+ err = mlx5_fpga_conn_create_wq(conn, temp_qpc);
+ if (err)
+ goto out;
+
+ conn->qp.rq.bufs = kvcalloc(conn->qp.rq.size,
+ sizeof(conn->qp.rq.bufs[0]),
+ GFP_KERNEL);
+ if (!conn->qp.rq.bufs) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ conn->qp.sq.bufs = kvcalloc(conn->qp.sq.size,
+ sizeof(conn->qp.sq.bufs[0]),
+ GFP_KERNEL);
+ if (!conn->qp.sq.bufs) {
+ err = -ENOMEM;
+ goto err_rq_bufs;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+ MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+ conn->qp.wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_sq_bufs;
+ }
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index);
+ MLX5_SET(qpc, qpc, log_page_size,
+ conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, fre, 1);
+ MLX5_SET(qpc, qpc, rlky, 1);
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn);
+ MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size));
+ MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size));
+ MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
+ MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+ MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+
+ mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
+
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (err)
+ goto err_sq_bufs;
+
+ conn->qp.qpn = MLX5_GET(create_qp_out, out, qpn);
+ mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.qpn);
+
+ goto out;
+
+err_sq_bufs:
+ kvfree(conn->qp.sq.bufs);
+err_rq_bufs:
+ kvfree(conn->qp.rq.bufs);
+err_wq:
+ mlx5_wq_destroy(&conn->qp.wq_ctrl);
+out:
+ kvfree(in);
+ return err;
+}
+
+static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn)
+{
+ int ix;
+
+ for (ix = 0; ix < conn->qp.rq.size; ix++) {
+ if (!conn->qp.rq.bufs[ix])
+ continue;
+ mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]);
+ kfree(conn->qp.rq.bufs[ix]);
+ conn->qp.rq.bufs[ix] = NULL;
+ }
+}
+
+static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_dma_buf *buf, *temp;
+ int ix;
+
+ for (ix = 0; ix < conn->qp.sq.size; ix++) {
+ buf = conn->qp.sq.bufs[ix];
+ if (!buf)
+ continue;
+ conn->qp.sq.bufs[ix] = NULL;
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ if (!buf->complete)
+ continue;
+ buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+ }
+ list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) {
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ if (!buf->complete)
+ continue;
+ buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+ }
+}
+
+static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_core_dev *dev = conn->fdev->mdev;
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, conn->qp.qpn);
+ mlx5_cmd_exec_in(dev, destroy_qp, in);
+
+ mlx5_fpga_conn_free_recv_bufs(conn);
+ mlx5_fpga_conn_flush_send_bufs(conn);
+ kvfree(conn->qp.sq.bufs);
+ kvfree(conn->qp.rq.bufs);
+ mlx5_wq_destroy(&conn->qp.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_core_dev *mdev = conn->fdev->mdev;
+ u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {};
+
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.qpn);
+
+ MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP);
+ MLX5_SET(qp_2rst_in, in, qpn, conn->qp.qpn);
+
+ return mlx5_cmd_exec_in(mdev, qp_2rst, in);
+}
+
+static int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
+{
+ u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc;
+
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.qpn);
+
+ qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
+ MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn);
+ MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+ MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+
+ MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+ MLX5_SET(rst2init_qp_in, in, qpn, conn->qp.qpn);
+
+ return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
+}
+
+static int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
+{
+ u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc;
+
+ mlx5_fpga_dbg(conn->fdev, "QP RTR\n");
+
+ qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES);
+ MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg));
+ MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn);
+ MLX5_SET(qpc, qpc, next_rcv_psn,
+ MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn));
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
+ ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
+ MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32));
+ MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
+ MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port));
+ MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
+ conn->qp.sgid_index);
+ MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0);
+ memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
+ MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip),
+ MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip));
+
+ MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+ MLX5_SET(init2rtr_qp_in, in, qpn, conn->qp.qpn);
+
+ return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
+}
+
+static int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc;
+
+ mlx5_fpga_dbg(conn->fdev, "QP RTS\n");
+
+ qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, log_ack_req_freq, 8);
+ MLX5_SET(qpc, qpc, min_rnr_nak, 0x12);
+ MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */
+ MLX5_SET(qpc, qpc, next_send_psn,
+ MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn));
+ MLX5_SET(qpc, qpc, retry_count, 7);
+ MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
+
+ MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+ MLX5_SET(rtr2rts_qp_in, in, qpn, conn->qp.qpn);
+ MLX5_SET(rtr2rts_qp_in, in, opt_param_mask, MLX5_QP_OPTPAR_RNR_TIMEOUT);
+
+ return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
+}
+
+static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ int err;
+
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE);
+ err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+ MLX5_FPGA_QPC_STATE, &conn->fpga_qpc);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err);
+ goto out;
+ }
+
+ err = mlx5_fpga_conn_reset_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state to reset\n");
+ goto err_fpga_qp;
+ }
+
+ err = mlx5_fpga_conn_init_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n");
+ goto err_fpga_qp;
+ }
+ conn->qp.active = true;
+
+ while (!mlx5_fpga_conn_post_recv_buf(conn))
+ ;
+
+ err = mlx5_fpga_conn_rtr_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n");
+ goto err_recv_bufs;
+ }
+
+ err = mlx5_fpga_conn_rts_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n");
+ goto err_recv_bufs;
+ }
+ goto out;
+
+err_recv_bufs:
+ mlx5_fpga_conn_free_recv_bufs(conn);
+err_fpga_qp:
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+ if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+ MLX5_FPGA_QPC_STATE, &conn->fpga_qpc))
+ mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n");
+out:
+ return err;
+}
+
+struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr,
+ enum mlx5_ifc_fpga_qp_type qp_type)
+{
+ struct mlx5_fpga_conn *ret, *conn;
+ u8 *remote_mac, *remote_ip;
+ int err;
+
+ if (!attr->recv_cb)
+ return ERR_PTR(-EINVAL);
+
+ conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+ if (!conn)
+ return ERR_PTR(-ENOMEM);
+
+ conn->fdev = fdev;
+ INIT_LIST_HEAD(&conn->qp.sq.backlog);
+
+ spin_lock_init(&conn->qp.sq.lock);
+
+ conn->recv_cb = attr->recv_cb;
+ conn->cb_arg = attr->cb_arg;
+
+ remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
+ err = mlx5_query_mac_address(fdev->mdev, remote_mac);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err;
+ }
+
+ /* Build Modified EUI-64 IPv6 address from the MAC address */
+ remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip);
+ remote_ip[0] = 0xfe;
+ remote_ip[1] = 0x80;
+ addrconf_addr_eui48(&remote_ip[8], remote_mac);
+
+ err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err;
+ }
+
+ err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index,
+ MLX5_ROCE_VERSION_2,
+ MLX5_ROCE_L3_TYPE_IPV6,
+ remote_ip, remote_mac, true, 0,
+ MLX5_FPGA_PORT_NUM);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_rsvd_gid;
+ }
+ mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index);
+
+ /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe,
+ * created during processing of the cqe
+ */
+ err = mlx5_fpga_conn_create_cq(conn,
+ (attr->tx_size + attr->rx_size) * 2);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_gid;
+ }
+
+ mlx5_fpga_conn_arm_cq(conn);
+
+ err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_cq;
+ }
+
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.qpn);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7);
+
+ err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc,
+ &conn->fpga_qpn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_qp;
+ }
+
+ err = mlx5_fpga_conn_connect(conn);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err_conn;
+ }
+
+ mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn);
+ ret = conn;
+ goto out;
+
+err_conn:
+ mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+err_qp:
+ mlx5_fpga_conn_destroy_qp(conn);
+err_cq:
+ mlx5_fpga_conn_destroy_cq(conn);
+err_gid:
+ mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL,
+ NULL, false, 0, MLX5_FPGA_PORT_NUM);
+err_rsvd_gid:
+ mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index);
+err:
+ kfree(conn);
+out:
+ return ret;
+}
+
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+ conn->qp.active = false;
+ tasklet_disable(&conn->cq.tasklet);
+ synchronize_irq(conn->cq.mcq.irqn);
+
+ mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+ mlx5_fpga_conn_destroy_qp(conn);
+ mlx5_fpga_conn_destroy_cq(conn);
+
+ mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0,
+ NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM);
+ mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index);
+ kfree(conn);
+}
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev)
+{
+ int err;
+
+ err = mlx5_nic_vport_enable_roce(fdev->mdev);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err);
+ goto out;
+ }
+
+ fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev);
+ if (IS_ERR(fdev->conn_res.uar)) {
+ err = PTR_ERR(fdev->conn_res.uar);
+ mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err);
+ goto err_roce;
+ }
+ mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n",
+ fdev->conn_res.uar->index);
+
+ err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn);
+ if (err) {
+ mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err);
+ goto err_uar;
+ }
+ mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn);
+
+ err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn,
+ &fdev->conn_res.mkey);
+ if (err) {
+ mlx5_fpga_err(fdev, "create mkey failed, %d\n", err);
+ goto err_dealloc_pd;
+ }
+ mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey);
+
+ return 0;
+
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+err_uar:
+ mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+err_roce:
+ mlx5_nic_vport_disable_roce(fdev->mdev);
+out:
+ return err;
+}
+
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev)
+{
+ mlx5_core_destroy_mkey(fdev->mdev, fdev->conn_res.mkey);
+ mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+ mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+ mlx5_nic_vport_disable_roce(fdev->mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
new file mode 100644
index 000000000..5116e869a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_CONN_H__
+#define __MLX5_FPGA_CONN_H__
+
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+
+#include "fpga/core.h"
+#include "fpga/sdk.h"
+#include "wq.h"
+
+struct mlx5_fpga_conn {
+ struct mlx5_fpga_device *fdev;
+
+ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+ void *cb_arg;
+
+ /* FPGA QP */
+ u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)];
+ u32 fpga_qpn;
+
+ /* CQ */
+ struct {
+ struct mlx5_cqwq wq;
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_core_cq mcq;
+ struct tasklet_struct tasklet;
+ } cq;
+
+ /* QP */
+ struct {
+ bool active;
+ int sgid_index;
+ struct mlx5_wq_qp wq;
+ struct mlx5_wq_ctrl wq_ctrl;
+ u32 qpn;
+ struct {
+ spinlock_t lock; /* Protects all SQ state */
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ struct mlx5_fpga_dma_buf **bufs;
+ struct list_head backlog;
+ } sq;
+ struct {
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ struct mlx5_fpga_dma_buf **bufs;
+ } rq;
+ } qp;
+};
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev);
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev);
+struct mlx5_fpga_conn *
+mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr,
+ enum mlx5_ifc_fpga_qp_type qp_type);
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn);
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf);
+
+#endif /* __MLX5_FPGA_CONN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
new file mode 100644
index 000000000..39c03dcbd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "lib/eq.h"
+#include "fpga/core.h"
+#include "fpga/conn.h"
+
+static const char *const mlx5_fpga_error_strings[] = {
+ "Null Syndrome",
+ "Corrupted DDR",
+ "Flash Timeout",
+ "Internal Link Error",
+ "Watchdog HW Failure",
+ "I2C Failure",
+ "Image Changed",
+ "Temperature Critical",
+};
+
+static const char * const mlx5_fpga_qp_error_strings[] = {
+ "Null Syndrome",
+ "Retry Counter Expired",
+ "RNR Expired",
+};
+static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
+{
+ struct mlx5_fpga_device *fdev = NULL;
+
+ fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
+ if (!fdev)
+ return NULL;
+
+ spin_lock_init(&fdev->state_lock);
+ fdev->state = MLX5_FPGA_STATUS_NONE;
+ return fdev;
+}
+
+static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
+{
+ switch (image) {
+ case MLX5_FPGA_IMAGE_USER:
+ return "user";
+ case MLX5_FPGA_IMAGE_FACTORY:
+ return "factory";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *mlx5_fpga_name(u32 fpga_id)
+{
+ static char ret[32];
+
+ switch (fpga_id) {
+ case MLX5_FPGA_NEWTON:
+ return "Newton";
+ case MLX5_FPGA_EDISON:
+ return "Edison";
+ case MLX5_FPGA_MORSE:
+ return "Morse";
+ case MLX5_FPGA_MORSEQ:
+ return "MorseQ";
+ }
+
+ snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
+ return ret;
+}
+
+static int mlx5_is_fpga_lookaside(u32 fpga_id)
+{
+ return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON;
+}
+
+static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
+{
+ struct mlx5_fpga_query query;
+ int err;
+
+ err = mlx5_fpga_query(fdev->mdev, &query);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
+ return err;
+ }
+
+ fdev->last_admin_image = query.admin_image;
+ fdev->last_oper_image = query.oper_image;
+
+ mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
+ query.status, query.admin_image, query.oper_image);
+
+ /* for FPGA lookaside projects FPGA load status is not important */
+ if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
+ return 0;
+
+ if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
+ mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
+ mlx5_fpga_image_name(fdev->last_oper_image),
+ query.status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+{
+ int err;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
+ return err;
+ }
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
+ return err;
+ }
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
+ return err;
+ }
+ return 0;
+}
+
+static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
+
+static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
+static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
+int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int max_num_qps;
+ unsigned long flags;
+ u32 fpga_id;
+ int err;
+
+ if (!fdev)
+ return 0;
+
+ err = mlx5_fpga_caps(fdev->mdev);
+ if (err)
+ goto out;
+
+ err = mlx5_fpga_device_load_check(fdev);
+ if (err)
+ goto out;
+
+ fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
+ mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id);
+
+ /* No QPs if FPGA does not participate in net processing */
+ if (mlx5_is_fpga_lookaside(fpga_id))
+ goto out;
+
+ mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n",
+ mlx5_fpga_image_name(fdev->last_oper_image),
+ fdev->last_oper_image,
+ MLX5_CAP_FPGA(fdev->mdev, image_version),
+ MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
+ MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
+ MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
+
+ max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+ if (!max_num_qps) {
+ mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
+ err = -ENOTSUPP;
+ goto out;
+ }
+
+ err = mlx5_core_reserve_gids(mdev, max_num_qps);
+ if (err)
+ goto out;
+
+ MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
+ MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
+
+ err = mlx5_fpga_conn_device_init(fdev);
+ if (err)
+ goto err_rsvd_gid;
+
+ if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+ err = mlx5_fpga_device_brb(fdev);
+ if (err)
+ goto err_conn_init;
+ }
+
+ goto out;
+
+err_conn_init:
+ mlx5_fpga_conn_device_cleanup(fdev);
+
+err_rsvd_gid:
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
+ mlx5_core_unreserve_gids(mdev, max_num_qps);
+out:
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ return err;
+}
+
+int mlx5_fpga_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = NULL;
+
+ if (!MLX5_CAP_GEN(mdev, fpga)) {
+ mlx5_core_dbg(mdev, "FPGA capability not present\n");
+ return 0;
+ }
+
+ mlx5_core_dbg(mdev, "Initializing FPGA\n");
+
+ fdev = mlx5_fpga_device_alloc();
+ if (!fdev)
+ return -ENOMEM;
+
+ fdev->mdev = mdev;
+ mdev->fpga = fdev;
+
+ return 0;
+}
+
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int max_num_qps;
+ unsigned long flags;
+ int err;
+
+ if (!fdev)
+ return;
+
+ if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
+ return;
+
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ return;
+ }
+ fdev->state = MLX5_FPGA_STATUS_NONE;
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+
+ if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+ if (err)
+ mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
+ err);
+ }
+
+ mlx5_fpga_conn_device_cleanup(fdev);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
+
+ max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+ mlx5_core_unreserve_gids(mdev, max_num_qps);
+}
+
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ mlx5_fpga_device_stop(mdev);
+ kfree(fdev);
+ mdev->fpga = NULL;
+}
+
+static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
+{
+ if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
+ return mlx5_fpga_error_strings[syndrome];
+ return "Unknown";
+}
+
+static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
+{
+ if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
+ return mlx5_fpga_qp_error_strings[syndrome];
+ return "Unknown";
+}
+
+static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
+ unsigned long event, void *eqe)
+{
+ void *data = ((struct mlx5_eqe *)eqe)->data.raw;
+ const char *event_name;
+ bool teardown = false;
+ unsigned long flags;
+ u8 syndrome;
+
+ switch (event) {
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ syndrome = MLX5_GET(fpga_error_event, data, syndrome);
+ event_name = mlx5_fpga_syndrome_to_string(syndrome);
+ break;
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
+ event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ switch (fdev->state) {
+ case MLX5_FPGA_STATUS_SUCCESS:
+ mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
+ teardown = true;
+ break;
+ default:
+ mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
+ syndrome, event_name);
+ }
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ /* We tear-down the card's interfaces and functionality because
+ * the FPGA bump-on-the-wire is misbehaving and we lose ability
+ * to communicate with the network. User may still be able to
+ * recover by re-programming or debugging the FPGA
+ */
+ if (teardown)
+ mlx5_trigger_health_work(fdev->mdev);
+
+ return NOTIFY_OK;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
new file mode 100644
index 000000000..750c32050
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_FPGA_CORE_H__
+#define __MLX5_FPGA_CORE_H__
+
+#ifdef CONFIG_MLX5_FPGA
+
+#include <linux/mlx5/eq.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "fpga/cmd.h"
+
+/* Represents an Innova device */
+struct mlx5_fpga_device {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_nb fpga_err_nb;
+ struct mlx5_nb fpga_qp_err_nb;
+ spinlock_t state_lock; /* Protects state transitions */
+ enum mlx5_fpga_status state;
+ enum mlx5_fpga_image last_admin_image;
+ enum mlx5_fpga_image last_oper_image;
+
+ /* QP Connection resources */
+ struct {
+ u32 pdn;
+ u32 mkey;
+ struct mlx5_uars_page *uar;
+ } conn_res;
+};
+
+#define mlx5_fpga_dbg(__adev, format, ...) \
+ mlx5_core_dbg((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_err(__adev, format, ...) \
+ mlx5_core_err((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_warn(__adev, format, ...) \
+ mlx5_core_warn((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \
+ mlx5_core_err_rl((__adev)->mdev, "FPGA: %s:%d: " \
+ format, __func__, __LINE__, ##__VA_ARGS__)
+
+#define mlx5_fpga_notice(__adev, format, ...) \
+ mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__)
+
+#define mlx5_fpga_info(__adev, format, ...) \
+ mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__)
+
+int mlx5_fpga_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
+int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif
+
+#endif /* __MLX5_FPGA_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
new file mode 100644
index 000000000..14962969c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "fpga/core.h"
+#include "fpga/conn.h"
+#include "fpga/sdk.h"
+
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr)
+{
+ return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create);
+
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+ mlx5_fpga_conn_destroy(conn);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy);
+
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ return mlx5_fpga_conn_send(conn, buf);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg);
+
+static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
+ u64 addr, u8 *buf)
+{
+ size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+ size_t bytes_done = 0;
+ u8 actual_size;
+ int err;
+
+ if (!size)
+ return -EINVAL;
+
+ if (!fdev->mdev)
+ return -ENOTCONN;
+
+ while (bytes_done < size) {
+ actual_size = min(max_size, (size - bytes_done));
+
+ err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+ addr + bytes_done,
+ buf + bytes_done, false);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n",
+ err);
+ break;
+ }
+
+ bytes_done += actual_size;
+ }
+
+ return err;
+}
+
+static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
+ u64 addr, u8 *buf)
+{
+ size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+ size_t bytes_done = 0;
+ u8 actual_size;
+ int err;
+
+ if (!size)
+ return -EINVAL;
+
+ if (!fdev->mdev)
+ return -ENOTCONN;
+
+ while (bytes_done < size) {
+ actual_size = min(max_size, (size - bytes_done));
+
+ err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+ addr + bytes_done,
+ buf + bytes_done, true);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n");
+ break;
+ }
+
+ bytes_done += actual_size;
+ }
+
+ return err;
+}
+
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type)
+{
+ int ret;
+
+ switch (access_type) {
+ case MLX5_FPGA_ACCESS_TYPE_I2C:
+ ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf);
+ if (ret)
+ return ret;
+ break;
+ default:
+ mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n",
+ access_type);
+ return -EACCES;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_read);
+
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type)
+{
+ int ret;
+
+ switch (access_type) {
+ case MLX5_FPGA_ACCESS_TYPE_I2C:
+ ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf);
+ if (ret)
+ return ret;
+ break;
+ default:
+ mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n",
+ access_type);
+ return -EACCES;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_write);
+
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf)
+{
+ return mlx5_fpga_sbu_caps(fdev->mdev, buf, size);
+}
+EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
new file mode 100644
index 000000000..89ef59265
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MLX5_FPGA_SDK_H
+#define MLX5_FPGA_SDK_H
+
+#include <linux/types.h>
+#include <linux/dma-direction.h>
+
+/**
+ * DOC: Innova SDK
+ * This header defines the in-kernel API for Innova FPGA client drivers.
+ */
+#define SBU_QP_QUEUE_SIZE 8
+#define MLX5_FPGA_CMD_TIMEOUT_MSEC (60 * 1000)
+
+/**
+ * enum mlx5_fpga_access_type - Enumerated the different methods possible for
+ * accessing the device memory address space
+ *
+ * @MLX5_FPGA_ACCESS_TYPE_I2C: Use the slow CX-FPGA I2C bus
+ * @MLX5_FPGA_ACCESS_TYPE_DONTCARE: Use the fastest available method
+ */
+enum mlx5_fpga_access_type {
+ MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
+ MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0,
+};
+
+struct mlx5_fpga_conn;
+struct mlx5_fpga_device;
+
+/**
+ * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry
+ */
+struct mlx5_fpga_dma_entry {
+ /** @data: Virtual address pointer to the data */
+ void *data;
+ /** @size: Size in bytes of the data */
+ unsigned int size;
+ /** @dma_addr: Private member. Physical DMA-mapped address of the data */
+ dma_addr_t dma_addr;
+};
+
+/**
+ * struct mlx5_fpga_dma_buf - A packet buffer
+ * May contain up to 2 scatter-gather data entries
+ */
+struct mlx5_fpga_dma_buf {
+ /** @dma_dir: DMA direction */
+ enum dma_data_direction dma_dir;
+ /** @sg: Scatter-gather entries pointing to the data in memory */
+ struct mlx5_fpga_dma_entry sg[2];
+ /** @list: Item in SQ backlog, for TX packets */
+ struct list_head list;
+ /**
+ * @complete: Completion routine, for TX packets
+ * @conn: FPGA Connection this packet was sent to
+ * @fdev: FPGA device this packet was sent to
+ * @buf: The packet buffer
+ * @status: 0 if successful, or an error code otherwise
+ */
+ void (*complete)(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf, u8 status);
+};
+
+/**
+ * struct mlx5_fpga_conn_attr - FPGA connection attributes
+ * Describes the attributes of a connection
+ */
+struct mlx5_fpga_conn_attr {
+ /** @tx_size: Size of connection TX queue, in packets */
+ unsigned int tx_size;
+ /** @rx_size: Size of connection RX queue, in packets */
+ unsigned int rx_size;
+ /**
+ * @recv_cb: Callback function which is called for received packets
+ * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg
+ * @buf: A buffer containing a received packet
+ *
+ * buf is guaranteed to only contain a single scatter-gather entry.
+ * The size of the actual packet received is specified in buf.sg[0].size
+ * When this callback returns, the packet buffer may be re-used for
+ * subsequent receives.
+ */
+ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+ /** @cb_arg: A context to be passed to recv_cb callback */
+ void *cb_arg;
+};
+
+/**
+ * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection
+ * @fdev: The FPGA device
+ * @attr: Attributes of the new connection
+ *
+ * Sets up a new FPGA SBU connection with the specified attributes.
+ * The receive callback function may be called for incoming messages even
+ * before this function returns.
+ *
+ * The caller must eventually destroy the connection by calling
+ * mlx5_fpga_sbu_conn_destroy.
+ *
+ * Return: A new connection, or ERR_PTR() error value otherwise.
+ */
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr);
+
+/**
+ * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection
+ * @conn: The FPGA SBU connection to destroy
+ *
+ * Cleans up an FPGA SBU connection which was previously created with
+ * mlx5_fpga_sbu_conn_create.
+ */
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn);
+
+/**
+ * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet
+ * @conn: An FPGA SBU connection
+ * @buf: The packet buffer
+ *
+ * Queues a packet for transmission over an FPGA SBU connection.
+ * The buffer should not be modified or freed until completion.
+ * Upon completion, the buf's complete() callback is invoked, indicating the
+ * success or error status of the transmission.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf);
+
+/**
+ * mlx5_fpga_mem_read() - Read from FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to read, in bytes
+ * @addr: Starting address to read from, in FPGA address space
+ * @buf: Buffer to read into
+ * @access_type: Method for reading
+ *
+ * Reads from the specified address into the specified buffer.
+ * The address may point to configuration space or to DDR.
+ * Large reads may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_mem_write() - Write to FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to write, in bytes
+ * @addr: Starting address to write to, in FPGA address space
+ * @buf: Buffer which contains data to write
+ * @access_type: Method for writing
+ *
+ * Writes the specified buffer data to FPGA memory at the specified address.
+ * The address may point to configuration space or to DDR.
+ * Large writes may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities
+ * @fdev: The FPGA device
+ * @size: Size of the buffer to read into
+ * @buf: Buffer to read the capabilities into
+ *
+ * Reads the FPGA SBU capabilities into the specified buffer.
+ * The format of the capabilities buffer is SBU-dependent.
+ *
+ * Return: 0 if successful
+ * -EINVAL if the buffer is not large enough to contain SBU caps
+ * or any other error value otherwise.
+ */
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf);
+
+#endif /* MLX5_FPGA_SDK_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
new file mode 100644
index 000000000..32d4c9674
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -0,0 +1,1107 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/mlx5_ifc.h>
+
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "fs_ft_pool.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 underlay_qpn,
+ bool disconnect)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table_attr *ft_attr,
+ struct mlx5_flow_table *next_ft)
+{
+ int max_fte = ft_attr->max_fte;
+
+ ft->max_fte = max_fte ? roundup_pow_of_two(max_fte) : 1;
+
+ return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_modify_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ struct mlx5_flow_group *fg)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_update_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat_params *params,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ return 0;
+}
+
+static void mlx5_cmd_stub_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+}
+
+static int mlx5_cmd_stub_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+ u8 namespace, u8 num_actions,
+ void *modify_actions,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ return 0;
+}
+
+static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+}
+
+static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_ns(struct mlx5_flow_root_namespace *ns)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
+{
+ return 0;
+}
+
+static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ return 0;
+}
+
+static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ bool ft_id_valid,
+ u32 ft_id)
+{
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+ if (ft_id_valid) {
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ft_id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ }
+
+ return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+}
+
+static int
+mlx5_cmd_stub_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+ int definer_id)
+{
+ return 0;
+}
+
+static int
+mlx5_cmd_stub_create_match_definer(struct mlx5_flow_root_namespace *ns,
+ u16 format_id, u32 *match_mask)
+{
+ return 0;
+}
+
+static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft, u32 underlay_qpn,
+ bool disconnect)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+ int err;
+
+ if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
+ underlay_qpn == 0)
+ return 0;
+
+ if (ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ !mlx5_lag_is_master(dev))
+ return 0;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
+
+ if (disconnect)
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 1);
+ else
+ MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+
+ MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn);
+ MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
+ MLX5_SET(set_flow_table_root_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+
+ err = mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ if (!err &&
+ ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ mlx5_lag_is_master(dev)) {
+ err = mlx5_cmd_set_slave_root_fdb(dev,
+ mlx5_lag_get_peer_mdev(dev),
+ !disconnect, (!disconnect) ?
+ ft->id : 0);
+ if (err && !disconnect) {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ns->root_ft->id);
+ mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ }
+ }
+
+ return err;
+}
+
+static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table_attr *ft_attr,
+ struct mlx5_flow_table *next_ft)
+{
+ int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
+ int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+ int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION);
+ u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+ unsigned int size;
+ int err;
+
+ if (ft_attr->max_fte != POOL_NEXT_SIZE)
+ size = roundup_pow_of_two(ft_attr->max_fte);
+ size = mlx5_ft_pool_get_avail_sz(dev, ft->type, ft_attr->max_fte);
+ if (!size)
+ return -ENOSPC;
+
+ MLX5_SET(create_flow_table_in, in, opcode,
+ MLX5_CMD_OP_CREATE_FLOW_TABLE);
+
+ MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid);
+ MLX5_SET(create_flow_table_in, in, table_type, ft->type);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, size ? ilog2(size) : 0);
+ MLX5_SET(create_flow_table_in, in, vport_number, ft->vport);
+ MLX5_SET(create_flow_table_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+
+ MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
+ en_decap);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
+ en_encap);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table,
+ term);
+
+ switch (ft->op_mod) {
+ case FS_FT_OP_MOD_NORMAL:
+ if (next_ft) {
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.table_miss_action,
+ MLX5_FLOW_TABLE_MISS_ACTION_FWD);
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.table_miss_id, next_ft->id);
+ } else {
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.table_miss_action,
+ ft->def_miss_action);
+ }
+ break;
+
+ case FS_FT_OP_MOD_LAG_DEMUX:
+ MLX5_SET(create_flow_table_in, in, op_mod, 0x1);
+ if (next_ft)
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.lag_master_next_table_id,
+ next_ft->id);
+ break;
+ }
+
+ err = mlx5_cmd_exec_inout(dev, create_flow_table, in, out);
+ if (!err) {
+ ft->id = MLX5_GET(create_flow_table_out, out,
+ table_id);
+ ft->max_fte = size;
+ } else {
+ mlx5_ft_pool_put_sz(ns->dev, size);
+ }
+
+ return err;
+}
+
+static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+ int err;
+
+ MLX5_SET(destroy_flow_table_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+ MLX5_SET(destroy_flow_table_in, in, table_type, ft->type);
+ MLX5_SET(destroy_flow_table_in, in, table_id, ft->id);
+ MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport);
+ MLX5_SET(destroy_flow_table_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+
+ err = mlx5_cmd_exec_in(dev, destroy_flow_table, in);
+ if (!err)
+ mlx5_ft_pool_put_sz(ns->dev, ft->max_fte);
+
+ return err;
+}
+
+static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+
+ MLX5_SET(modify_flow_table_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_FLOW_TABLE);
+ MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
+ MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
+
+ if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) {
+ MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID);
+ if (next_ft) {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.lag_master_next_table_id, next_ft->id);
+ } else {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.lag_master_next_table_id, 0);
+ }
+ } else {
+ MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport);
+ MLX5_SET(modify_flow_table_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+ MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
+ if (next_ft) {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.table_miss_action,
+ MLX5_FLOW_TABLE_MISS_ACTION_FWD);
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.table_miss_id,
+ next_ft->id);
+ } else {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.table_miss_action,
+ ft->def_miss_action);
+ }
+ }
+
+ return mlx5_cmd_exec_in(dev, modify_flow_table, in);
+}
+
+static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ struct mlx5_flow_group *fg)
+{
+ u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+ int err;
+
+ MLX5_SET(create_flow_group_in, in, opcode,
+ MLX5_CMD_OP_CREATE_FLOW_GROUP);
+ MLX5_SET(create_flow_group_in, in, table_type, ft->type);
+ MLX5_SET(create_flow_group_in, in, table_id, ft->id);
+ if (ft->vport) {
+ MLX5_SET(create_flow_group_in, in, vport_number, ft->vport);
+ MLX5_SET(create_flow_group_in, in, other_vport, 1);
+ }
+
+ MLX5_SET(create_flow_group_in, in, vport_number, ft->vport);
+ MLX5_SET(create_flow_group_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+ err = mlx5_cmd_exec_inout(dev, create_flow_group, in, out);
+ if (!err)
+ fg->id = MLX5_GET(create_flow_group_out, out,
+ group_id);
+ return err;
+}
+
+static int mlx5_cmd_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+
+ MLX5_SET(destroy_flow_group_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+ MLX5_SET(destroy_flow_group_in, in, table_type, ft->type);
+ MLX5_SET(destroy_flow_group_in, in, table_id, ft->id);
+ MLX5_SET(destroy_flow_group_in, in, group_id, fg->id);
+ MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport);
+ MLX5_SET(destroy_flow_group_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+ return mlx5_cmd_exec_in(dev, destroy_flow_group, in);
+}
+
+static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
+ struct fs_fte *fte, bool *extended_dest)
+{
+ int fw_log_max_fdb_encap_uplink =
+ MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
+ int num_fwd_destinations = 0;
+ struct mlx5_flow_rule *dst;
+ int num_encap = 0;
+
+ *extended_dest = false;
+ if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ||
+ dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_NONE)
+ continue;
+ if ((dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
+ dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+ num_encap++;
+ num_fwd_destinations++;
+ }
+ if (num_fwd_destinations > 1 && num_encap > 0)
+ *extended_dest = true;
+
+ if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
+ mlx5_core_warn(dev, "FW does not support extended destination");
+ return -EOPNOTSUPP;
+ }
+ if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
+ mlx5_core_warn(dev, "FW does not support more than %d encaps",
+ 1 << fw_log_max_fdb_encap_uplink);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void
+mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context)
+{
+ void *exe_aso_ctrl;
+ void *execute_aso;
+
+ execute_aso = MLX5_ADDR_OF(flow_context, in_flow_context,
+ execute_aso[0]);
+ MLX5_SET(execute_aso, execute_aso, valid, 1);
+ MLX5_SET(execute_aso, execute_aso, aso_object_id,
+ fte->action.exe_aso.object_id);
+
+ exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id,
+ fte->action.exe_aso.return_reg_id);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type,
+ fte->action.exe_aso.type);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color,
+ fte->action.exe_aso.flow_meter.init_color);
+ MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id,
+ fte->action.exe_aso.flow_meter.meter_idx);
+}
+
+static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
+ int opmod, int modify_mask,
+ struct mlx5_flow_table *ft,
+ unsigned group_id,
+ struct fs_fte *fte)
+{
+ u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
+ bool extended_dest = false;
+ struct mlx5_flow_rule *dst;
+ void *in_flow_context, *vlan;
+ void *in_match_value;
+ unsigned int inlen;
+ int dst_cnt_size;
+ void *in_dests;
+ u32 *in;
+ int err;
+
+ if (mlx5_set_extended_dest(dev, fte, &extended_dest))
+ return -EOPNOTSUPP;
+
+ if (!extended_dest)
+ dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
+ else
+ dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
+
+ inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+ MLX5_SET(set_fte_in, in, op_mod, opmod);
+ MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask);
+ MLX5_SET(set_fte_in, in, table_type, ft->type);
+ MLX5_SET(set_fte_in, in, table_id, ft->id);
+ MLX5_SET(set_fte_in, in, flow_index, fte->index);
+ MLX5_SET(set_fte_in, in, ignore_flow_level,
+ !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL));
+
+ MLX5_SET(set_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(set_fte_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+
+ in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+ MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+
+ MLX5_SET(flow_context, in_flow_context, flow_tag,
+ fte->flow_context.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, flow_source,
+ fte->flow_context.flow_source);
+
+ MLX5_SET(flow_context, in_flow_context, extended_destination,
+ extended_dest);
+ if (extended_dest) {
+ u32 action;
+
+ action = fte->action.action &
+ ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ MLX5_SET(flow_context, in_flow_context, action, action);
+ } else {
+ MLX5_SET(flow_context, in_flow_context, action,
+ fte->action.action);
+ if (fte->action.pkt_reformat)
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+ fte->action.pkt_reformat->id);
+ }
+ if (fte->action.modify_hdr)
+ MLX5_SET(flow_context, in_flow_context, modify_header_id,
+ fte->action.modify_hdr->id);
+
+ MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type,
+ fte->action.crypto.type);
+ MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_obj_id,
+ fte->action.crypto.obj_id);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
+
+ in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
+ match_value);
+ memcpy(in_match_value, &fte->val, sizeof(fte->val));
+
+ in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ int list_size = 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ enum mlx5_flow_destination_type type = dst->dest_attr.type;
+ enum mlx5_ifc_flow_destination_type ifc_type;
+ unsigned int id;
+
+ if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_NONE:
+ continue;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ id = dst->dest_attr.ft_num;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ id = dst->dest_attr.ft->id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid,
+ !!(dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID));
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id,
+ dst->dest_attr.vport.vhca_id);
+ if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) {
+ /* destination_id is reserved */
+ id = 0;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK;
+ break;
+ }
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT;
+ id = dst->dest_attr.vport.num;
+ if (extended_dest &&
+ dst->dest_attr.vport.pkt_reformat) {
+ MLX5_SET(dest_format_struct, in_dests,
+ packet_reformat,
+ !!(dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
+ MLX5_SET(extended_dest_format, in_dests,
+ packet_reformat_id,
+ dst->dest_attr.vport.pkt_reformat->id);
+ }
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+ id = dst->dest_attr.sampler_id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
+ break;
+ default:
+ id = dst->dest_attr.tir_num;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR;
+ }
+
+ MLX5_SET(dest_format_struct, in_dests, destination_type,
+ ifc_type);
+ MLX5_SET(dest_format_struct, in_dests, destination_id, id);
+ in_dests += dst_cnt_size;
+ list_size++;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, destination_list_size,
+ list_size);
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
+ log_max_flow_counter,
+ ft->type));
+ int list_size = 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ if (dst->dest_attr.type !=
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
+ dst->dest_attr.counter_id);
+ in_dests += dst_cnt_size;
+ list_size++;
+ }
+ if (list_size > max_list_size) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, flow_counter_list_size,
+ list_size);
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+ if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) {
+ mlx5_cmd_set_fte_flow_meter(fte, in_flow_context);
+ } else {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+ }
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+err_out:
+ kvfree(in);
+ return err;
+}
+
+static int mlx5_cmd_create_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte)
+{
+ struct mlx5_core_dev *dev = ns->dev;
+ unsigned int group_id = group->id;
+
+ return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
+}
+
+static int mlx5_cmd_update_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ int opmod;
+ struct mlx5_core_dev *dev = ns->dev;
+ int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.
+ flow_modify_en);
+ if (!atomic_mod_cap)
+ return -EOPNOTSUPP;
+ opmod = 1;
+
+ return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, fg->id, fte);
+}
+
+static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+
+ MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+ MLX5_SET(delete_fte_in, in, table_type, ft->type);
+ MLX5_SET(delete_fte_in, in, table_id, ft->id);
+ MLX5_SET(delete_fte_in, in, flow_index, fte->index);
+ MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(delete_fte_in, in, other_vport,
+ !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
+
+ return mlx5_cmd_exec_in(dev, delete_fte, in);
+}
+
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+ enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+ u32 *id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+ MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask);
+
+ err = mlx5_cmd_exec_inout(dev, alloc_flow_counter, in, out);
+ if (!err)
+ *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+ return err;
+}
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+{
+ return mlx5_cmd_fc_bulk_alloc(dev, 0, id);
+}
+
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {};
+
+ MLX5_SET(dealloc_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+ MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
+ return mlx5_cmd_exec_in(dev, dealloc_flow_counter, in);
+}
+
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
+ u64 *packets, u64 *bytes)
+{
+ u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+ MLX5_ST_SZ_BYTES(traffic_counter)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {};
+ void *stats;
+ int err = 0;
+
+ MLX5_SET(query_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+ MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+ MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
+ *packets = MLX5_GET64(traffic_counter, stats, packets);
+ *bytes = MLX5_GET64(traffic_counter, stats, octets);
+ return 0;
+}
+
+int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len)
+{
+ return MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+ MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len;
+}
+
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
+ u32 *out)
+{
+ int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len);
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {};
+
+ MLX5_SET(query_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+ MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id);
+ MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat_params *params,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+ void *packet_reformat_context_in;
+ int max_encap_size;
+ void *reformat;
+ int inlen;
+ int err;
+ u32 *in;
+
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB ||
+ namespace == MLX5_FLOW_NAMESPACE_FDB_BYPASS)
+ max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
+ else
+ max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size);
+
+ if (params->size > max_encap_size) {
+ mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n",
+ params->size, max_encap_size);
+ return -EINVAL;
+ }
+
+ in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) +
+ params->size, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in,
+ in, packet_reformat_context);
+ reformat = MLX5_ADDR_OF(packet_reformat_context_in,
+ packet_reformat_context_in,
+ reformat_data);
+ inlen = reformat - (void *)in + params->size;
+
+ MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_data_size, params->size);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_type, params->type);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_param_0, params->param_0);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_param_1, params->param_1);
+ if (params->data && params->size)
+ memcpy(reformat, params->data, params->size);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+ pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out,
+ out, packet_reformat_id);
+ kfree(in);
+ return err;
+}
+
+static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+
+ MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
+ pkt_reformat->id);
+
+ mlx5_cmd_exec_in(dev, dealloc_packet_reformat_context, in);
+}
+
+static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+ u8 namespace, u8 num_actions,
+ void *modify_actions,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {};
+ int max_actions, actions_size, inlen, err;
+ struct mlx5_core_dev *dev = ns->dev;
+ void *actions_in;
+ u8 table_type;
+ u32 *in;
+
+ switch (namespace) {
+ case MLX5_FLOW_NAMESPACE_FDB:
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
+ max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions);
+ table_type = FS_FT_FDB;
+ break;
+ case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC:
+ case MLX5_FLOW_NAMESPACE_KERNEL:
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
+ table_type = FS_FT_NIC_RX;
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC:
+ case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC:
+ max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
+ table_type = FS_FT_NIC_TX;
+ break;
+ case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+ max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions);
+ table_type = FS_FT_ESW_INGRESS_ACL;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ max_actions = MLX5_CAP_FLOWTABLE_RDMA_TX(dev, max_modify_header_actions);
+ table_type = FS_FT_RDMA_TX;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (num_actions > max_actions) {
+ mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n",
+ num_actions, max_actions);
+ return -EOPNOTSUPP;
+ }
+
+ actions_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) * num_actions;
+ inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(alloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type);
+ MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions);
+
+ actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions);
+ memcpy(actions_in, modify_actions, actions_size);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+ modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
+ kfree(in);
+ return err;
+}
+
+static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+
+ MLX5_SET(dealloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
+ modify_hdr->id);
+
+ mlx5_cmd_exec_in(dev, dealloc_modify_header_context, in);
+}
+
+static int mlx5_cmd_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+ int definer_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_OBJ_TYPE_MATCH_DEFINER);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id);
+
+ return mlx5_cmd_exec(ns->dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
+ u16 format_id, u32 *match_mask)
+{
+ u32 out[MLX5_ST_SZ_DW(create_match_definer_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {};
+ struct mlx5_core_dev *dev = ns->dev;
+ void *ptr;
+ int err;
+
+ MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.obj_type,
+ MLX5_OBJ_TYPE_MATCH_DEFINER);
+
+ ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context);
+ MLX5_SET(match_definer, ptr, format_id, format_id);
+
+ ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask);
+ memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask));
+
+ err = mlx5_cmd_exec_inout(dev, create_match_definer, in, out);
+ return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+}
+
+static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ return 0;
+}
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds = {
+ .create_flow_table = mlx5_cmd_create_flow_table,
+ .destroy_flow_table = mlx5_cmd_destroy_flow_table,
+ .modify_flow_table = mlx5_cmd_modify_flow_table,
+ .create_flow_group = mlx5_cmd_create_flow_group,
+ .destroy_flow_group = mlx5_cmd_destroy_flow_group,
+ .create_fte = mlx5_cmd_create_fte,
+ .update_fte = mlx5_cmd_update_fte,
+ .delete_fte = mlx5_cmd_delete_fte,
+ .update_root_ft = mlx5_cmd_update_root_ft,
+ .packet_reformat_alloc = mlx5_cmd_packet_reformat_alloc,
+ .packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc,
+ .modify_header_alloc = mlx5_cmd_modify_header_alloc,
+ .modify_header_dealloc = mlx5_cmd_modify_header_dealloc,
+ .create_match_definer = mlx5_cmd_create_match_definer,
+ .destroy_match_definer = mlx5_cmd_destroy_match_definer,
+ .set_peer = mlx5_cmd_stub_set_peer,
+ .create_ns = mlx5_cmd_stub_create_ns,
+ .destroy_ns = mlx5_cmd_stub_destroy_ns,
+ .get_capabilities = mlx5_cmd_get_capabilities,
+};
+
+static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
+ .create_flow_table = mlx5_cmd_stub_create_flow_table,
+ .destroy_flow_table = mlx5_cmd_stub_destroy_flow_table,
+ .modify_flow_table = mlx5_cmd_stub_modify_flow_table,
+ .create_flow_group = mlx5_cmd_stub_create_flow_group,
+ .destroy_flow_group = mlx5_cmd_stub_destroy_flow_group,
+ .create_fte = mlx5_cmd_stub_create_fte,
+ .update_fte = mlx5_cmd_stub_update_fte,
+ .delete_fte = mlx5_cmd_stub_delete_fte,
+ .update_root_ft = mlx5_cmd_stub_update_root_ft,
+ .packet_reformat_alloc = mlx5_cmd_stub_packet_reformat_alloc,
+ .packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc,
+ .modify_header_alloc = mlx5_cmd_stub_modify_header_alloc,
+ .modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc,
+ .create_match_definer = mlx5_cmd_stub_create_match_definer,
+ .destroy_match_definer = mlx5_cmd_stub_destroy_match_definer,
+ .set_peer = mlx5_cmd_stub_set_peer,
+ .create_ns = mlx5_cmd_stub_create_ns,
+ .destroy_ns = mlx5_cmd_stub_destroy_ns,
+ .get_capabilities = mlx5_cmd_stub_get_capabilities,
+};
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
+{
+ return &mlx5_flow_cmds;
+}
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void)
+{
+ return &mlx5_flow_cmd_stubs;
+}
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type)
+{
+ switch (type) {
+ case FS_FT_NIC_RX:
+ case FS_FT_ESW_EGRESS_ACL:
+ case FS_FT_ESW_INGRESS_ACL:
+ case FS_FT_FDB:
+ case FS_FT_SNIFFER_RX:
+ case FS_FT_SNIFFER_TX:
+ case FS_FT_NIC_TX:
+ case FS_FT_RDMA_RX:
+ case FS_FT_RDMA_TX:
+ case FS_FT_PORT_SEL:
+ return mlx5_fs_cmd_get_fw_cmds();
+ default:
+ return mlx5_fs_cmd_get_stub_cmds();
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
new file mode 100644
index 000000000..8ef4254b9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_CMD_
+#define _MLX5_FS_CMD_
+
+#include "fs_core.h"
+
+struct mlx5_flow_cmds {
+ int (*create_flow_table)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table_attr *ft_attr,
+ struct mlx5_flow_table *next_ft);
+ int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft);
+
+ int (*modify_flow_table)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft);
+
+ int (*create_flow_group)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ struct mlx5_flow_group *fg);
+
+ int (*destroy_flow_group)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg);
+
+ int (*create_fte)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte);
+
+ int (*update_fte)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ int modify_mask,
+ struct fs_fte *fte);
+
+ int (*delete_fte)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte);
+
+ int (*update_root_ft)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 underlay_qpn,
+ bool disconnect);
+
+ int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat_params *params,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5_pkt_reformat *pkt_reformat);
+
+ void (*packet_reformat_dealloc)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat *pkt_reformat);
+
+ int (*modify_header_alloc)(struct mlx5_flow_root_namespace *ns,
+ u8 namespace, u8 num_actions,
+ void *modify_actions,
+ struct mlx5_modify_hdr *modify_hdr);
+
+ void (*modify_header_dealloc)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_modify_hdr *modify_hdr);
+
+ int (*set_peer)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns);
+
+ int (*create_ns)(struct mlx5_flow_root_namespace *ns);
+ int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
+ int (*create_match_definer)(struct mlx5_flow_root_namespace *ns,
+ u16 format_id, u32 *match_mask);
+ int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns,
+ int definer_id);
+
+ u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type);
+};
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+ enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+ u32 *id);
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
+ u64 *packets, u64 *bytes);
+
+int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len);
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
+ u32 *out);
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
new file mode 100644
index 000000000..e6674118b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -0,0 +1,3612 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mutex.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/eswitch.h>
+
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "fs_ft_pool.h"
+#include "diag/fs_tracepoint.h"
+
+#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
+ sizeof(struct init_tree_node))
+
+#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\
+ ...) {.type = FS_TYPE_PRIO,\
+ .min_ft_level = min_level_val,\
+ .num_levels = num_levels_val,\
+ .num_leaf_prios = num_prios_val,\
+ .caps = caps_val,\
+ .children = (struct init_tree_node[]) {__VA_ARGS__},\
+ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
+}
+
+#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\
+ ADD_PRIO(num_prios_val, 0, num_levels_val, {},\
+ __VA_ARGS__)\
+
+#define ADD_NS(def_miss_act, ...) {.type = FS_TYPE_NAMESPACE, \
+ .def_miss_action = def_miss_act,\
+ .children = (struct init_tree_node[]) {__VA_ARGS__},\
+ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
+}
+
+#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
+ sizeof(long))
+
+#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
+
+#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
+ .caps = (long[]) {__VA_ARGS__} }
+
+#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
+ FS_CAP(flow_table_properties_nic_receive.modify_root), \
+ FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
+ FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
+
+#define FS_CHAINING_CAPS_EGRESS \
+ FS_REQUIRED_CAPS( \
+ FS_CAP(flow_table_properties_nic_transmit.flow_modify_en), \
+ FS_CAP(flow_table_properties_nic_transmit.modify_root), \
+ FS_CAP(flow_table_properties_nic_transmit \
+ .identified_miss_table_mode), \
+ FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
+
+#define FS_CHAINING_CAPS_RDMA_TX \
+ FS_REQUIRED_CAPS( \
+ FS_CAP(flow_table_properties_nic_transmit_rdma.flow_modify_en), \
+ FS_CAP(flow_table_properties_nic_transmit_rdma.modify_root), \
+ FS_CAP(flow_table_properties_nic_transmit_rdma \
+ .identified_miss_table_mode), \
+ FS_CAP(flow_table_properties_nic_transmit_rdma \
+ .flow_table_modify))
+
+#define LEFTOVERS_NUM_LEVELS 1
+#define LEFTOVERS_NUM_PRIOS 1
+
+#define RDMA_RX_COUNTERS_PRIO_NUM_LEVELS 1
+#define RDMA_TX_COUNTERS_PRIO_NUM_LEVELS 1
+
+#define BY_PASS_PRIO_NUM_LEVELS 1
+#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
+ LEFTOVERS_NUM_PRIOS)
+
+#define KERNEL_RX_MACSEC_NUM_PRIOS 1
+#define KERNEL_RX_MACSEC_NUM_LEVELS 2
+#define KERNEL_RX_MACSEC_MIN_LEVEL (BY_PASS_MIN_LEVEL + KERNEL_RX_MACSEC_NUM_PRIOS)
+
+#define ETHTOOL_PRIO_NUM_LEVELS 1
+#define ETHTOOL_NUM_PRIOS 11
+#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
+/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}} */
+#define KERNEL_NIC_PRIO_NUM_LEVELS 7
+#define KERNEL_NIC_NUM_PRIOS 1
+/* One more level for tc */
+#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
+
+#define KERNEL_NIC_TC_NUM_PRIOS 1
+#define KERNEL_NIC_TC_NUM_LEVELS 3
+
+#define ANCHOR_NUM_LEVELS 1
+#define ANCHOR_NUM_PRIOS 1
+#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
+
+#define OFFLOADS_MAX_FT 2
+#define OFFLOADS_NUM_PRIOS 2
+#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS)
+
+#define LAG_PRIO_NUM_LEVELS 1
+#define LAG_NUM_PRIOS 1
+#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1)
+
+#define KERNEL_TX_IPSEC_NUM_PRIOS 1
+#define KERNEL_TX_IPSEC_NUM_LEVELS 1
+#define KERNEL_TX_IPSEC_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS)
+
+#define KERNEL_TX_MACSEC_NUM_PRIOS 1
+#define KERNEL_TX_MACSEC_NUM_LEVELS 2
+#define KERNEL_TX_MACSEC_MIN_LEVEL (KERNEL_TX_IPSEC_MIN_LEVEL + KERNEL_TX_MACSEC_NUM_PRIOS)
+
+struct node_caps {
+ size_t arr_sz;
+ long *caps;
+};
+
+static struct init_tree_node {
+ enum fs_node_type type;
+ struct init_tree_node *children;
+ int ar_size;
+ struct node_caps caps;
+ int min_ft_level;
+ int num_leaf_prios;
+ int prio;
+ int num_levels;
+ enum mlx5_flow_table_miss_action def_miss_action;
+} root_fs = {
+ .type = FS_TYPE_NAMESPACE,
+ .ar_size = 8,
+ .children = (struct init_tree_node[]){
+ ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+ BY_PASS_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_RX_MACSEC_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_RX_MACSEC_NUM_PRIOS,
+ KERNEL_RX_MACSEC_NUM_LEVELS))),
+ ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
+ LAG_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS,
+ OFFLOADS_MAX_FT))),
+ ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
+ ETHTOOL_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS,
+ KERNEL_NIC_TC_NUM_LEVELS),
+ ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
+ KERNEL_NIC_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS,
+ LEFTOVERS_NUM_LEVELS))),
+ ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS,
+ ANCHOR_NUM_LEVELS))),
+ }
+};
+
+static struct init_tree_node egress_root_fs = {
+ .type = FS_TYPE_NAMESPACE,
+ .ar_size = 3,
+ .children = (struct init_tree_node[]) {
+ ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+ BY_PASS_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_TX_IPSEC_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS,
+ KERNEL_TX_IPSEC_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_TX_MACSEC_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_TX_MACSEC_NUM_PRIOS,
+ KERNEL_TX_MACSEC_NUM_LEVELS))),
+ }
+};
+
+enum {
+ RDMA_RX_COUNTERS_PRIO,
+ RDMA_RX_BYPASS_PRIO,
+ RDMA_RX_KERNEL_PRIO,
+};
+
+#define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS
+#define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1)
+#define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2)
+
+static struct init_tree_node rdma_rx_root_fs = {
+ .type = FS_TYPE_NAMESPACE,
+ .ar_size = 3,
+ .children = (struct init_tree_node[]) {
+ [RDMA_RX_COUNTERS_PRIO] =
+ ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(MLX5_RDMA_RX_NUM_COUNTERS_PRIOS,
+ RDMA_RX_COUNTERS_PRIO_NUM_LEVELS))),
+ [RDMA_RX_BYPASS_PRIO] =
+ ADD_PRIO(0, RDMA_RX_BYPASS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
+ BY_PASS_PRIO_NUM_LEVELS))),
+ [RDMA_RX_KERNEL_PRIO] =
+ ADD_PRIO(0, RDMA_RX_KERNEL_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
+ ADD_MULTIPLE_PRIO(1, 1))),
+ }
+};
+
+enum {
+ RDMA_TX_COUNTERS_PRIO,
+ RDMA_TX_BYPASS_PRIO,
+};
+
+#define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS
+#define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1)
+
+static struct init_tree_node rdma_tx_root_fs = {
+ .type = FS_TYPE_NAMESPACE,
+ .ar_size = 2,
+ .children = (struct init_tree_node[]) {
+ [RDMA_TX_COUNTERS_PRIO] =
+ ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS,
+ RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))),
+ [RDMA_TX_BYPASS_PRIO] =
+ ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS_RDMA_TX,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(RDMA_TX_BYPASS_MIN_LEVEL,
+ BY_PASS_PRIO_NUM_LEVELS))),
+ }
+};
+
+enum fs_i_lock_class {
+ FS_LOCK_GRANDPARENT,
+ FS_LOCK_PARENT,
+ FS_LOCK_CHILD
+};
+
+static const struct rhashtable_params rhash_fte = {
+ .key_len = sizeof_field(struct fs_fte, val),
+ .key_offset = offsetof(struct fs_fte, val),
+ .head_offset = offsetof(struct fs_fte, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+};
+
+static const struct rhashtable_params rhash_fg = {
+ .key_len = sizeof_field(struct mlx5_flow_group, mask),
+ .key_offset = offsetof(struct mlx5_flow_group, mask),
+ .head_offset = offsetof(struct mlx5_flow_group, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+
+};
+
+static void del_hw_flow_table(struct fs_node *node);
+static void del_hw_flow_group(struct fs_node *node);
+static void del_hw_fte(struct fs_node *node);
+static void del_sw_flow_table(struct fs_node *node);
+static void del_sw_flow_group(struct fs_node *node);
+static void del_sw_fte(struct fs_node *node);
+static void del_sw_prio(struct fs_node *node);
+static void del_sw_ns(struct fs_node *node);
+/* Delete rule (destination) is special case that
+ * requires to lock the FTE for all the deletion process.
+ */
+static void del_sw_hw_rule(struct fs_node *node);
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+ struct mlx5_flow_destination *d2);
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
+static struct mlx5_flow_rule *
+find_flow_rule(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest);
+
+static void tree_init_node(struct fs_node *node,
+ void (*del_hw_func)(struct fs_node *),
+ void (*del_sw_func)(struct fs_node *))
+{
+ refcount_set(&node->refcount, 1);
+ INIT_LIST_HEAD(&node->list);
+ INIT_LIST_HEAD(&node->children);
+ init_rwsem(&node->lock);
+ node->del_hw_func = del_hw_func;
+ node->del_sw_func = del_sw_func;
+ node->active = false;
+}
+
+static void tree_add_node(struct fs_node *node, struct fs_node *parent)
+{
+ if (parent)
+ refcount_inc(&parent->refcount);
+ node->parent = parent;
+
+ /* Parent is the root */
+ if (!parent)
+ node->root = node;
+ else
+ node->root = parent->root;
+}
+
+static int tree_get_node(struct fs_node *node)
+{
+ return refcount_inc_not_zero(&node->refcount);
+}
+
+static void nested_down_read_ref_node(struct fs_node *node,
+ enum fs_i_lock_class class)
+{
+ if (node) {
+ down_read_nested(&node->lock, class);
+ refcount_inc(&node->refcount);
+ }
+}
+
+static void nested_down_write_ref_node(struct fs_node *node,
+ enum fs_i_lock_class class)
+{
+ if (node) {
+ down_write_nested(&node->lock, class);
+ refcount_inc(&node->refcount);
+ }
+}
+
+static void down_write_ref_node(struct fs_node *node, bool locked)
+{
+ if (node) {
+ if (!locked)
+ down_write(&node->lock);
+ refcount_inc(&node->refcount);
+ }
+}
+
+static void up_read_ref_node(struct fs_node *node)
+{
+ refcount_dec(&node->refcount);
+ up_read(&node->lock);
+}
+
+static void up_write_ref_node(struct fs_node *node, bool locked)
+{
+ refcount_dec(&node->refcount);
+ if (!locked)
+ up_write(&node->lock);
+}
+
+static void tree_put_node(struct fs_node *node, bool locked)
+{
+ struct fs_node *parent_node = node->parent;
+
+ if (refcount_dec_and_test(&node->refcount)) {
+ if (node->del_hw_func)
+ node->del_hw_func(node);
+ if (parent_node) {
+ down_write_ref_node(parent_node, locked);
+ list_del_init(&node->list);
+ }
+ node->del_sw_func(node);
+ if (parent_node)
+ up_write_ref_node(parent_node, locked);
+ node = NULL;
+ }
+ if (!node && parent_node)
+ tree_put_node(parent_node, locked);
+}
+
+static int tree_remove_node(struct fs_node *node, bool locked)
+{
+ if (refcount_read(&node->refcount) > 1) {
+ refcount_dec(&node->refcount);
+ return -EEXIST;
+ }
+ tree_put_node(node, locked);
+ return 0;
+}
+
+static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio)
+{
+ struct fs_prio *iter_prio;
+
+ fs_for_each_prio(iter_prio, ns) {
+ if (iter_prio->prio == prio)
+ return iter_prio;
+ }
+
+ return NULL;
+}
+
+static bool is_fwd_next_action(u32 action)
+{
+ return action & (MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
+}
+
+static bool is_fwd_dest_type(enum mlx5_flow_destination_type type)
+{
+ return type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM ||
+ type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE ||
+ type == MLX5_FLOW_DESTINATION_TYPE_UPLINK ||
+ type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER ||
+ type == MLX5_FLOW_DESTINATION_TYPE_TIR;
+}
+
+static bool check_valid_spec(const struct mlx5_flow_spec *spec)
+{
+ int i;
+
+ for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
+ if (spec->match_value[i] & ~spec->match_criteria[i]) {
+ pr_warn("mlx5_core: match_value differs from match_criteria\n");
+ return false;
+ }
+
+ return true;
+}
+
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+{
+ struct fs_node *root;
+ struct mlx5_flow_namespace *ns;
+
+ root = node->root;
+
+ if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) {
+ pr_warn("mlx5: flow steering node is not in tree or garbaged\n");
+ return NULL;
+ }
+
+ ns = container_of(root, struct mlx5_flow_namespace, node);
+ return container_of(ns, struct mlx5_flow_root_namespace, ns);
+}
+
+static inline struct mlx5_flow_steering *get_steering(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root = find_root(node);
+
+ if (root)
+ return root->dev->priv.steering;
+ return NULL;
+}
+
+static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root = find_root(node);
+
+ if (root)
+ return root->dev;
+ return NULL;
+}
+
+static void del_sw_ns(struct fs_node *node)
+{
+ kfree(node);
+}
+
+static void del_sw_prio(struct fs_node *node)
+{
+ kfree(node);
+}
+
+static void del_hw_flow_table(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_core_dev *dev;
+ int err;
+
+ fs_get_obj(ft, node);
+ dev = get_dev(&ft->node);
+ root = find_root(&ft->node);
+ trace_mlx5_fs_del_ft(ft);
+
+ if (node->active) {
+ err = root->cmds->destroy_flow_table(root, ft);
+ if (err)
+ mlx5_core_warn(dev, "flow steering can't destroy ft\n");
+ }
+}
+
+static void del_sw_flow_table(struct fs_node *node)
+{
+ struct mlx5_flow_table *ft;
+ struct fs_prio *prio;
+
+ fs_get_obj(ft, node);
+
+ rhltable_destroy(&ft->fgs_hash);
+ if (ft->node.parent) {
+ fs_get_obj(prio, ft->node.parent);
+ prio->num_ft--;
+ }
+ kfree(ft);
+}
+
+static void modify_fte(struct fs_fte *fte)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_core_dev *dev;
+ int err;
+
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+ dev = get_dev(&fte->node);
+
+ root = find_root(&ft->node);
+ err = root->cmds->update_fte(root, ft, fg, fte->modify_mask, fte);
+ if (err)
+ mlx5_core_warn(dev,
+ "%s can't del rule fg id=%d fte_index=%d\n",
+ __func__, fg->id, fte->index);
+ fte->modify_mask = 0;
+}
+
+static void del_sw_hw_rule(struct fs_node *node)
+{
+ struct mlx5_flow_rule *rule;
+ struct fs_fte *fte;
+
+ fs_get_obj(rule, node);
+ fs_get_obj(fte, rule->node.parent);
+ trace_mlx5_fs_del_rule(rule);
+ if (is_fwd_next_action(rule->sw_action)) {
+ mutex_lock(&rule->dest_attr.ft->lock);
+ list_del(&rule->next_ft);
+ mutex_unlock(&rule->dest_attr.ft->lock);
+ }
+
+ if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) {
+ --fte->dests_size;
+ fte->modify_mask |=
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+ fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ goto out;
+ }
+
+ if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
+ --fte->dests_size;
+ fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+ fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ goto out;
+ }
+
+ if (is_fwd_dest_type(rule->dest_attr.type)) {
+ --fte->dests_size;
+ --fte->fwd_dests;
+
+ if (!fte->fwd_dests)
+ fte->action.action &=
+ ~MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ fte->modify_mask |=
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ goto out;
+ }
+out:
+ kfree(rule);
+}
+
+static void del_hw_fte(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_core_dev *dev;
+ struct fs_fte *fte;
+ int err;
+
+ fs_get_obj(fte, node);
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+
+ trace_mlx5_fs_del_fte(fte);
+ WARN_ON(fte->dests_size);
+ dev = get_dev(&ft->node);
+ root = find_root(&ft->node);
+ if (node->active) {
+ err = root->cmds->delete_fte(root, ft, fte);
+ if (err)
+ mlx5_core_warn(dev,
+ "flow steering can't delete fte in index %d of flow group id %d\n",
+ fte->index, fg->id);
+ node->active = false;
+ }
+}
+
+static void del_sw_fte(struct fs_node *node)
+{
+ struct mlx5_flow_steering *steering = get_steering(node);
+ struct mlx5_flow_group *fg;
+ struct fs_fte *fte;
+ int err;
+
+ fs_get_obj(fte, node);
+ fs_get_obj(fg, fte->node.parent);
+
+ err = rhashtable_remove_fast(&fg->ftes_hash,
+ &fte->hash,
+ rhash_fte);
+ WARN_ON(err);
+ ida_free(&fg->fte_allocator, fte->index - fg->start_index);
+ kmem_cache_free(steering->ftes_cache, fte);
+}
+
+static void del_hw_flow_group(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_table *ft;
+ struct mlx5_core_dev *dev;
+
+ fs_get_obj(fg, node);
+ fs_get_obj(ft, fg->node.parent);
+ dev = get_dev(&ft->node);
+ trace_mlx5_fs_del_fg(fg);
+
+ root = find_root(&ft->node);
+ if (fg->node.active && root->cmds->destroy_flow_group(root, ft, fg))
+ mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
+ fg->id, ft->id);
+}
+
+static void del_sw_flow_group(struct fs_node *node)
+{
+ struct mlx5_flow_steering *steering = get_steering(node);
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ fs_get_obj(fg, node);
+ fs_get_obj(ft, fg->node.parent);
+
+ rhashtable_destroy(&fg->ftes_hash);
+ ida_destroy(&fg->fte_allocator);
+ if (ft->autogroup.active &&
+ fg->max_ftes == ft->autogroup.group_size &&
+ fg->start_index < ft->autogroup.max_fte)
+ ft->autogroup.num_groups--;
+ err = rhltable_remove(&ft->fgs_hash,
+ &fg->hash,
+ rhash_fg);
+ WARN_ON(err);
+ kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
+{
+ int index;
+ int ret;
+
+ index = ida_alloc_max(&fg->fte_allocator, fg->max_ftes - 1, GFP_KERNEL);
+ if (index < 0)
+ return index;
+
+ fte->index = index + fg->start_index;
+ ret = rhashtable_insert_fast(&fg->ftes_hash,
+ &fte->hash,
+ rhash_fte);
+ if (ret)
+ goto err_ida_remove;
+
+ tree_add_node(&fte->node, &fg->node);
+ list_add_tail(&fte->node.list, &fg->node.children);
+ return 0;
+
+err_ida_remove:
+ ida_free(&fg->fte_allocator, index);
+ return ret;
+}
+
+static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
+ const struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct fs_fte *fte;
+
+ fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL);
+ if (!fte)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(fte->val, &spec->match_value, sizeof(fte->val));
+ fte->node.type = FS_TYPE_FLOW_ENTRY;
+ fte->action = *flow_act;
+ fte->flow_context = spec->flow_context;
+
+ tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
+
+ return fte;
+}
+
+static void dealloc_flow_group(struct mlx5_flow_steering *steering,
+ struct mlx5_flow_group *fg)
+{
+ rhashtable_destroy(&fg->ftes_hash);
+ kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
+ u8 match_criteria_enable,
+ const void *match_criteria,
+ int start_index,
+ int end_index)
+{
+ struct mlx5_flow_group *fg;
+ int ret;
+
+ fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL);
+ if (!fg)
+ return ERR_PTR(-ENOMEM);
+
+ ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
+ if (ret) {
+ kmem_cache_free(steering->fgs_cache, fg);
+ return ERR_PTR(ret);
+ }
+
+ ida_init(&fg->fte_allocator);
+ fg->mask.match_criteria_enable = match_criteria_enable;
+ memcpy(&fg->mask.match_criteria, match_criteria,
+ sizeof(fg->mask.match_criteria));
+ fg->node.type = FS_TYPE_FLOW_GROUP;
+ fg->start_index = start_index;
+ fg->max_ftes = end_index - start_index + 1;
+
+ return fg;
+}
+
+static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
+ u8 match_criteria_enable,
+ const void *match_criteria,
+ int start_index,
+ int end_index,
+ struct list_head *prev)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *fg;
+ int ret;
+
+ fg = alloc_flow_group(steering, match_criteria_enable, match_criteria,
+ start_index, end_index);
+ if (IS_ERR(fg))
+ return fg;
+
+ /* initialize refcnt, add to parent list */
+ ret = rhltable_insert(&ft->fgs_hash,
+ &fg->hash,
+ rhash_fg);
+ if (ret) {
+ dealloc_flow_group(steering, fg);
+ return ERR_PTR(ret);
+ }
+
+ tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group);
+ tree_add_node(&fg->node, &ft->node);
+ /* Add node to group list */
+ list_add(&fg->node.list, prev);
+ atomic_inc(&ft->node.version);
+
+ return fg;
+}
+
+static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport,
+ enum fs_flow_table_type table_type,
+ enum fs_flow_table_op_mod op_mod,
+ u32 flags)
+{
+ struct mlx5_flow_table *ft;
+ int ret;
+
+ ft = kzalloc(sizeof(*ft), GFP_KERNEL);
+ if (!ft)
+ return ERR_PTR(-ENOMEM);
+
+ ret = rhltable_init(&ft->fgs_hash, &rhash_fg);
+ if (ret) {
+ kfree(ft);
+ return ERR_PTR(ret);
+ }
+
+ ft->level = level;
+ ft->node.type = FS_TYPE_FLOW_TABLE;
+ ft->op_mod = op_mod;
+ ft->type = table_type;
+ ft->vport = vport;
+ ft->flags = flags;
+ INIT_LIST_HEAD(&ft->fwd_rules);
+ mutex_init(&ft->lock);
+
+ return ft;
+}
+
+/* If reverse is false, then we search for the first flow table in the
+ * root sub-tree from start(closest from right), else we search for the
+ * last flow table in the root sub-tree till start(closest from left).
+ */
+static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
+ struct list_head *start,
+ bool reverse)
+{
+#define list_advance_entry(pos, reverse) \
+ ((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
+
+#define list_for_each_advance_continue(pos, head, reverse) \
+ for (pos = list_advance_entry(pos, reverse); \
+ &pos->list != (head); \
+ pos = list_advance_entry(pos, reverse))
+
+ struct fs_node *iter = list_entry(start, struct fs_node, list);
+ struct mlx5_flow_table *ft = NULL;
+
+ if (!root)
+ return NULL;
+
+ list_for_each_advance_continue(iter, &root->children, reverse) {
+ if (iter->type == FS_TYPE_FLOW_TABLE) {
+ fs_get_obj(ft, iter);
+ return ft;
+ }
+ ft = find_closest_ft_recursive(iter, &iter->children, reverse);
+ if (ft)
+ return ft;
+ }
+
+ return ft;
+}
+
+static struct fs_node *find_prio_chains_parent(struct fs_node *parent,
+ struct fs_node **child)
+{
+ struct fs_node *node = NULL;
+
+ while (parent && parent->type != FS_TYPE_PRIO_CHAINS) {
+ node = parent;
+ parent = parent->parent;
+ }
+
+ if (child)
+ *child = node;
+
+ return parent;
+}
+
+/* If reverse is false then return the first flow table next to the passed node
+ * in the tree, else return the last flow table before the node in the tree.
+ * If skip is true, skip the flow tables in the same prio_chains prio.
+ */
+static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse,
+ bool skip)
+{
+ struct fs_node *prio_chains_parent = NULL;
+ struct mlx5_flow_table *ft = NULL;
+ struct fs_node *curr_node;
+ struct fs_node *parent;
+
+ if (skip)
+ prio_chains_parent = find_prio_chains_parent(node, NULL);
+ parent = node->parent;
+ curr_node = node;
+ while (!ft && parent) {
+ if (parent != prio_chains_parent)
+ ft = find_closest_ft_recursive(parent, &curr_node->list,
+ reverse);
+ curr_node = parent;
+ parent = curr_node->parent;
+ }
+ return ft;
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node)
+{
+ return find_closest_ft(node, false, true);
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node)
+{
+ return find_closest_ft(node, true, true);
+}
+
+static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
+ struct mlx5_flow_act *flow_act)
+{
+ struct fs_prio *prio;
+ bool next_ns;
+
+ next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+ fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);
+
+ return find_next_chained_ft(&prio->node);
+}
+
+static int connect_fts_in_prio(struct mlx5_core_dev *dev,
+ struct fs_prio *prio,
+ struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+ struct mlx5_flow_table *iter;
+ int err;
+
+ fs_for_each_ft(iter, prio) {
+ err = root->cmds->modify_flow_table(root, iter, ft);
+ if (err) {
+ mlx5_core_err(dev,
+ "Failed to modify flow table id %d, type %d, err %d\n",
+ iter->id, iter->type, err);
+ /* The driver is out of sync with the FW */
+ return err;
+ }
+ }
+ return 0;
+}
+
+static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node,
+ struct fs_node *parent,
+ struct fs_node **child,
+ bool reverse)
+{
+ struct mlx5_flow_table *ft;
+
+ ft = find_closest_ft(node, reverse, false);
+
+ if (ft && parent == find_prio_chains_parent(&ft->node, child))
+ return ft;
+
+ return NULL;
+}
+
+/* Connect flow tables from previous priority of prio to ft */
+static int connect_prev_fts(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct fs_node *prio_parent, *parent = NULL, *child, *node;
+ struct mlx5_flow_table *prev_ft;
+ int err = 0;
+
+ prio_parent = find_prio_chains_parent(&prio->node, &child);
+
+ /* return directly if not under the first sub ns of prio_chains prio */
+ if (prio_parent && !list_is_first(&child->list, &prio_parent->children))
+ return 0;
+
+ prev_ft = find_prev_chained_ft(&prio->node);
+ while (prev_ft) {
+ struct fs_prio *prev_prio;
+
+ fs_get_obj(prev_prio, prev_ft->node.parent);
+ err = connect_fts_in_prio(dev, prev_prio, ft);
+ if (err)
+ break;
+
+ if (!parent) {
+ parent = find_prio_chains_parent(&prev_prio->node, &child);
+ if (!parent)
+ break;
+ }
+
+ node = child;
+ prev_ft = find_closet_ft_prio_chains(node, parent, &child, true);
+ }
+ return err;
+}
+
+static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
+ *prio)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+ struct mlx5_ft_underlay_qp *uqp;
+ int min_level = INT_MAX;
+ int err = 0;
+ u32 qpn;
+
+ if (root->root_ft)
+ min_level = root->root_ft->level;
+
+ if (ft->level >= min_level)
+ return 0;
+
+ if (list_empty(&root->underlay_qpns)) {
+ /* Don't set any QPN (zero) in case QPN list is empty */
+ qpn = 0;
+ err = root->cmds->update_root_ft(root, ft, qpn, false);
+ } else {
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ qpn = uqp->qpn;
+ err = root->cmds->update_root_ft(root, ft,
+ qpn, false);
+ if (err)
+ break;
+ }
+ }
+
+ if (err)
+ mlx5_core_warn(root->dev,
+ "Update root flow table of id(%u) qpn(%d) failed\n",
+ ft->id, qpn);
+ else
+ root->root_ft = ft;
+
+ return err;
+}
+
+static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct fs_fte *fte;
+ int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ int err = 0;
+
+ fs_get_obj(fte, rule->node.parent);
+ if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return -EINVAL;
+ down_write_ref_node(&fte->node, false);
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+
+ memcpy(&rule->dest_attr, dest, sizeof(*dest));
+ root = find_root(&ft->node);
+ err = root->cmds->update_fte(root, ft, fg,
+ modify_mask, fte);
+ up_write_ref_node(&fte->node, false);
+
+ return err;
+}
+
+int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
+ struct mlx5_flow_destination *new_dest,
+ struct mlx5_flow_destination *old_dest)
+{
+ int i;
+
+ if (!old_dest) {
+ if (handle->num_rules != 1)
+ return -EINVAL;
+ return _mlx5_modify_rule_destination(handle->rule[0],
+ new_dest);
+ }
+
+ for (i = 0; i < handle->num_rules; i++) {
+ if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
+ return _mlx5_modify_rule_destination(handle->rule[i],
+ new_dest);
+ }
+
+ return -EINVAL;
+}
+
+/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */
+static int connect_fwd_rules(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *new_next_ft,
+ struct mlx5_flow_table *old_next_ft)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_rule *iter;
+ int err = 0;
+
+ /* new_next_ft and old_next_ft could be NULL only
+ * when we create/destroy the anchor flow table.
+ */
+ if (!new_next_ft || !old_next_ft)
+ return 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = new_next_ft;
+
+ mutex_lock(&old_next_ft->lock);
+ list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
+ mutex_unlock(&old_next_ft->lock);
+ list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
+ if ((iter->sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) &&
+ iter->ft->ns == new_next_ft->ns)
+ continue;
+
+ err = _mlx5_modify_rule_destination(iter, &dest);
+ if (err)
+ pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
+ new_next_ft->id);
+ }
+ return 0;
+}
+
+static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct mlx5_flow_table *next_ft, *first_ft;
+ int err = 0;
+
+ /* Connect_prev_fts and update_root_ft_create are mutually exclusive */
+
+ first_ft = list_first_entry_or_null(&prio->node.children,
+ struct mlx5_flow_table, node.list);
+ if (!first_ft || first_ft->level > ft->level) {
+ err = connect_prev_fts(dev, ft, prio);
+ if (err)
+ return err;
+
+ next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node);
+ err = connect_fwd_rules(dev, ft, next_ft);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.modify_root))
+ err = update_root_ft_create(ft, prio);
+ return err;
+}
+
+static void list_add_flow_table(struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct list_head *prev = &prio->node.children;
+ struct mlx5_flow_table *iter;
+
+ fs_for_each_ft(iter, prio) {
+ if (iter->level > ft->level)
+ break;
+ prev = &iter->node.list;
+ }
+ list_add(&ft->node.list, prev);
+}
+
+static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
+ struct mlx5_flow_table_attr *ft_attr,
+ enum fs_flow_table_op_mod op_mod,
+ u16 vport)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ns->node);
+ bool unmanaged = ft_attr->flags & MLX5_FLOW_TABLE_UNMANAGED;
+ struct mlx5_flow_table *next_ft;
+ struct fs_prio *fs_prio = NULL;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ if (!root) {
+ pr_err("mlx5: flow steering failed to find root of namespace\n");
+ return ERR_PTR(-ENODEV);
+ }
+
+ mutex_lock(&root->chain_lock);
+ fs_prio = find_prio(ns, ft_attr->prio);
+ if (!fs_prio) {
+ err = -EINVAL;
+ goto unlock_root;
+ }
+ if (!unmanaged) {
+ /* The level is related to the
+ * priority level range.
+ */
+ if (ft_attr->level >= fs_prio->num_levels) {
+ err = -ENOSPC;
+ goto unlock_root;
+ }
+
+ ft_attr->level += fs_prio->start_level;
+ }
+
+ /* The level is related to the
+ * priority level range.
+ */
+ ft = alloc_flow_table(ft_attr->level,
+ vport,
+ root->table_type,
+ op_mod, ft_attr->flags);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto unlock_root;
+ }
+
+ tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
+ next_ft = unmanaged ? ft_attr->next_ft :
+ find_next_chained_ft(&fs_prio->node);
+ ft->def_miss_action = ns->def_miss_action;
+ ft->ns = ns;
+ err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft);
+ if (err)
+ goto free_ft;
+
+ if (!unmanaged) {
+ err = connect_flow_table(root->dev, ft, fs_prio);
+ if (err)
+ goto destroy_ft;
+ }
+
+ ft->node.active = true;
+ down_write_ref_node(&fs_prio->node, false);
+ if (!unmanaged) {
+ tree_add_node(&ft->node, &fs_prio->node);
+ list_add_flow_table(ft, fs_prio);
+ } else {
+ ft->node.root = fs_prio->node.root;
+ }
+ fs_prio->num_ft++;
+ up_write_ref_node(&fs_prio->node, false);
+ mutex_unlock(&root->chain_lock);
+ trace_mlx5_fs_add_ft(ft);
+ return ft;
+destroy_ft:
+ root->cmds->destroy_flow_table(root, ft);
+free_ft:
+ rhltable_destroy(&ft->fgs_hash);
+ kfree(ft);
+unlock_root:
+ mutex_unlock(&root->chain_lock);
+ return ERR_PTR(err);
+}
+
+struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
+ struct mlx5_flow_table_attr *ft_attr)
+{
+ return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0);
+}
+EXPORT_SYMBOL(mlx5_create_flow_table);
+
+u32 mlx5_flow_table_id(struct mlx5_flow_table *ft)
+{
+ return ft->id;
+}
+EXPORT_SYMBOL(mlx5_flow_table_id);
+
+struct mlx5_flow_table *
+mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
+ struct mlx5_flow_table_attr *ft_attr, u16 vport)
+{
+ return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, vport);
+}
+
+struct mlx5_flow_table*
+mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
+ int prio, u32 level)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+
+ ft_attr.level = level;
+ ft_attr.prio = prio;
+ ft_attr.max_fte = 1;
+
+ return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
+}
+EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
+
+#define MAX_FLOW_GROUP_SIZE BIT(24)
+struct mlx5_flow_table*
+mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
+ struct mlx5_flow_table_attr *ft_attr)
+{
+ int num_reserved_entries = ft_attr->autogroup.num_reserved_entries;
+ int max_num_groups = ft_attr->autogroup.max_num_groups;
+ struct mlx5_flow_table *ft;
+ int autogroups_max_fte;
+
+ ft = mlx5_create_flow_table(ns, ft_attr);
+ if (IS_ERR(ft))
+ return ft;
+
+ autogroups_max_fte = ft->max_fte - num_reserved_entries;
+ if (max_num_groups > autogroups_max_fte)
+ goto err_validate;
+ if (num_reserved_entries > ft->max_fte)
+ goto err_validate;
+
+ /* Align the number of groups according to the largest group size */
+ if (autogroups_max_fte / (max_num_groups + 1) > MAX_FLOW_GROUP_SIZE)
+ max_num_groups = (autogroups_max_fte / MAX_FLOW_GROUP_SIZE) - 1;
+
+ ft->autogroup.active = true;
+ ft->autogroup.required_groups = max_num_groups;
+ ft->autogroup.max_fte = autogroups_max_fte;
+ /* We save place for flow groups in addition to max types */
+ ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1);
+
+ return ft;
+
+err_validate:
+ mlx5_destroy_flow_table(ft);
+ return ERR_PTR(-ENOSPC);
+}
+EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
+
+struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
+ u32 *fg_in)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ fg_in, match_criteria);
+ u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
+ fg_in,
+ match_criteria_enable);
+ int start_index = MLX5_GET(create_flow_group_in, fg_in,
+ start_flow_index);
+ int end_index = MLX5_GET(create_flow_group_in, fg_in,
+ end_flow_index);
+ struct mlx5_flow_group *fg;
+ int err;
+
+ if (ft->autogroup.active && start_index < ft->autogroup.max_fte)
+ return ERR_PTR(-EPERM);
+
+ down_write_ref_node(&ft->node, false);
+ fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
+ start_index, end_index,
+ ft->node.children.prev);
+ up_write_ref_node(&ft->node, false);
+ if (IS_ERR(fg))
+ return fg;
+
+ err = root->cmds->create_flow_group(root, ft, fg_in, fg);
+ if (err) {
+ tree_put_node(&fg->node, false);
+ return ERR_PTR(err);
+ }
+ trace_mlx5_fs_add_fg(fg);
+ fg->node.active = true;
+
+ return fg;
+}
+EXPORT_SYMBOL(mlx5_create_flow_group);
+
+static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_rule *rule;
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return NULL;
+
+ INIT_LIST_HEAD(&rule->next_ft);
+ rule->node.type = FS_TYPE_FLOW_DEST;
+ if (dest)
+ memcpy(&rule->dest_attr, dest, sizeof(*dest));
+ else
+ rule->dest_attr.type = MLX5_FLOW_DESTINATION_TYPE_NONE;
+
+ return rule;
+}
+
+static struct mlx5_flow_handle *alloc_handle(int num_rules)
+{
+ struct mlx5_flow_handle *handle;
+
+ handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL);
+ if (!handle)
+ return NULL;
+
+ handle->num_rules = num_rules;
+
+ return handle;
+}
+
+static void destroy_flow_handle(struct fs_fte *fte,
+ struct mlx5_flow_handle *handle,
+ struct mlx5_flow_destination *dest,
+ int i)
+{
+ for (; --i >= 0;) {
+ if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) {
+ fte->dests_size--;
+ list_del(&handle->rule[i]->node.list);
+ kfree(handle->rule[i]);
+ }
+ }
+ kfree(handle);
+}
+
+static struct mlx5_flow_handle *
+create_flow_handle(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ int *modify_mask,
+ bool *new_rule)
+{
+ struct mlx5_flow_handle *handle;
+ struct mlx5_flow_rule *rule = NULL;
+ static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+ static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ int type;
+ int i = 0;
+
+ handle = alloc_handle((dest_num) ? dest_num : 1);
+ if (!handle)
+ return ERR_PTR(-ENOMEM);
+
+ do {
+ if (dest) {
+ rule = find_flow_rule(fte, dest + i);
+ if (rule) {
+ refcount_inc(&rule->node.refcount);
+ goto rule_found;
+ }
+ }
+
+ *new_rule = true;
+ rule = alloc_rule(dest + i);
+ if (!rule)
+ goto free_rules;
+
+ /* Add dest to dests list- we need flow tables to be in the
+ * end of the list for forward to next prio rules.
+ */
+ tree_init_node(&rule->node, NULL, del_sw_hw_rule);
+ if (dest &&
+ dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+ list_add(&rule->node.list, &fte->node.children);
+ else
+ list_add_tail(&rule->node.list, &fte->node.children);
+ if (dest) {
+ fte->dests_size++;
+
+ if (is_fwd_dest_type(dest[i].type))
+ fte->fwd_dests++;
+
+ type = dest[i].type ==
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ *modify_mask |= type ? count : dst;
+ }
+rule_found:
+ handle->rule[i] = rule;
+ } while (++i < dest_num);
+
+ return handle;
+
+free_rules:
+ destroy_flow_handle(fte, handle, dest, i);
+ return ERR_PTR(-ENOMEM);
+}
+
+/* fte should not be deleted while calling this function */
+static struct mlx5_flow_handle *
+add_rule_fte(struct fs_fte *fte,
+ struct mlx5_flow_group *fg,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ bool update_action)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_handle *handle;
+ struct mlx5_flow_table *ft;
+ int modify_mask = 0;
+ int err;
+ bool new_rule = false;
+
+ handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
+ &new_rule);
+ if (IS_ERR(handle) || !new_rule)
+ goto out;
+
+ if (update_action)
+ modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+
+ fs_get_obj(ft, fg->node.parent);
+ root = find_root(&fg->node);
+ if (!(fte->status & FS_FTE_STATUS_EXISTING))
+ err = root->cmds->create_fte(root, ft, fg, fte);
+ else
+ err = root->cmds->update_fte(root, ft, fg, modify_mask, fte);
+ if (err)
+ goto free_handle;
+
+ fte->node.active = true;
+ fte->status |= FS_FTE_STATUS_EXISTING;
+ atomic_inc(&fg->node.version);
+
+out:
+ return handle;
+
+free_handle:
+ destroy_flow_handle(fte, handle, dest, handle->num_rules);
+ return ERR_PTR(err);
+}
+
+static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft,
+ const struct mlx5_flow_spec *spec)
+{
+ struct list_head *prev = &ft->node.children;
+ u32 max_fte = ft->autogroup.max_fte;
+ unsigned int candidate_index = 0;
+ unsigned int group_size = 0;
+ struct mlx5_flow_group *fg;
+
+ if (!ft->autogroup.active)
+ return ERR_PTR(-ENOENT);
+
+ if (ft->autogroup.num_groups < ft->autogroup.required_groups)
+ group_size = ft->autogroup.group_size;
+
+ /* max_fte == ft->autogroup.max_types */
+ if (group_size == 0)
+ group_size = 1;
+
+ /* sorted by start_index */
+ fs_for_each_fg(fg, ft) {
+ if (candidate_index + group_size > fg->start_index)
+ candidate_index = fg->start_index + fg->max_ftes;
+ else
+ break;
+ prev = &fg->node.list;
+ }
+
+ if (candidate_index + group_size > max_fte)
+ return ERR_PTR(-ENOSPC);
+
+ fg = alloc_insert_flow_group(ft,
+ spec->match_criteria_enable,
+ spec->match_criteria,
+ candidate_index,
+ candidate_index + group_size - 1,
+ prev);
+ if (IS_ERR(fg))
+ goto out;
+
+ if (group_size == ft->autogroup.group_size)
+ ft->autogroup.num_groups++;
+
+out:
+ return fg;
+}
+
+static int create_auto_flow_group(struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *match_criteria_addr;
+ u8 src_esw_owner_mask_on;
+ void *misc;
+ int err;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ fg->mask.match_criteria_enable);
+ MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index +
+ fg->max_ftes - 1);
+
+ misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria,
+ misc_parameters);
+ src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, in,
+ source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on);
+
+ match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
+ in, match_criteria);
+ memcpy(match_criteria_addr, fg->mask.match_criteria,
+ sizeof(fg->mask.match_criteria));
+
+ err = root->cmds->create_flow_group(root, ft, in, fg);
+ if (!err) {
+ fg->node.active = true;
+ trace_mlx5_fs_add_fg(fg);
+ }
+
+ kvfree(in);
+ return err;
+}
+
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+ struct mlx5_flow_destination *d2)
+{
+ if (d1->type == d2->type) {
+ if (((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ d1->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
+ d1->vport.num == d2->vport.num &&
+ d1->vport.flags == d2->vport.flags &&
+ ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
+ (d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
+ ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
+ (d1->vport.pkt_reformat->id ==
+ d2->vport.pkt_reformat->id) : true)) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+ d1->ft == d2->ft) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ d1->tir_num == d2->tir_num) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
+ d1->ft_num == d2->ft_num) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER &&
+ d1->sampler_id == d2->sampler_id))
+ return true;
+ }
+
+ return false;
+}
+
+static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_rule *rule;
+
+ list_for_each_entry(rule, &fte->node.children, node.list) {
+ if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
+ return rule;
+ }
+ return NULL;
+}
+
+static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0,
+ const struct mlx5_fs_vlan *vlan1)
+{
+ return vlan0->ethtype != vlan1->ethtype ||
+ vlan0->vid != vlan1->vid ||
+ vlan0->prio != vlan1->prio;
+}
+
+static bool check_conflicting_actions(const struct mlx5_flow_act *act1,
+ const struct mlx5_flow_act *act2)
+{
+ u32 action1 = act1->action;
+ u32 action2 = act2->action;
+ u32 xored_actions;
+
+ xored_actions = action1 ^ action2;
+
+ /* if one rule only wants to count, it's ok */
+ if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
+ action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return false;
+
+ if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT &&
+ act1->pkt_reformat != act2->pkt_reformat)
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ act1->modify_hdr != act2->modify_hdr)
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+ check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0]))
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 &&
+ check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1]))
+ return true;
+
+ return false;
+}
+
+static int check_conflicting_ftes(struct fs_fte *fte,
+ const struct mlx5_flow_context *flow_context,
+ const struct mlx5_flow_act *flow_act)
+{
+ if (check_conflicting_actions(flow_act, &fte->action)) {
+ mlx5_core_warn(get_dev(&fte->node),
+ "Found two FTEs with conflicting actions\n");
+ return -EEXIST;
+ }
+
+ if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
+ fte->flow_context.flow_tag != flow_context->flow_tag) {
+ mlx5_core_warn(get_dev(&fte->node),
+ "FTE flow tag %u already exists with different flow tag %u\n",
+ fte->flow_context.flow_tag,
+ flow_context->flow_tag);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
+ const struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ struct fs_fte *fte)
+{
+ struct mlx5_flow_handle *handle;
+ int old_action;
+ int i;
+ int ret;
+
+ ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
+ if (ret)
+ return ERR_PTR(ret);
+
+ old_action = fte->action.action;
+ fte->action.action |= flow_act->action;
+ handle = add_rule_fte(fte, fg, dest, dest_num,
+ old_action != flow_act->action);
+ if (IS_ERR(handle)) {
+ fte->action.action = old_action;
+ return handle;
+ }
+ trace_mlx5_fs_set_fte(fte, false);
+
+ for (i = 0; i < handle->num_rules; i++) {
+ if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+ tree_add_node(&handle->rule[i]->node, &fte->node);
+ trace_mlx5_fs_add_rule(handle->rule[i]);
+ }
+ }
+ return handle;
+}
+
+static bool counter_is_valid(u32 action)
+{
+ return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
+}
+
+static bool dest_is_valid(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_table *ft)
+{
+ bool ignore_level = flow_act->flags & FLOW_ACT_IGNORE_FLOW_LEVEL;
+ u32 action = flow_act->action;
+
+ if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
+ return counter_is_valid(action);
+
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return true;
+
+ if (ignore_level) {
+ if (ft->type != FS_FT_FDB &&
+ ft->type != FS_FT_NIC_RX)
+ return false;
+
+ if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+ ft->type != dest->ft->type)
+ return false;
+ }
+
+ if (!dest || ((dest->type ==
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
+ (dest->ft->level <= ft->level && !ignore_level)))
+ return false;
+ return true;
+}
+
+struct match_list {
+ struct list_head list;
+ struct mlx5_flow_group *g;
+};
+
+static void free_match_list(struct match_list *head, bool ft_locked)
+{
+ struct match_list *iter, *match_tmp;
+
+ list_for_each_entry_safe(iter, match_tmp, &head->list,
+ list) {
+ tree_put_node(&iter->g->node, ft_locked);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+}
+
+static int build_match_list(struct match_list *match_head,
+ struct mlx5_flow_table *ft,
+ const struct mlx5_flow_spec *spec,
+ struct mlx5_flow_group *fg,
+ bool ft_locked)
+{
+ struct rhlist_head *tmp, *list;
+ struct mlx5_flow_group *g;
+ int err = 0;
+
+ rcu_read_lock();
+ INIT_LIST_HEAD(&match_head->list);
+ /* Collect all fgs which has a matching match_criteria */
+ list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
+ /* RCU is atomic, we can't execute FW commands here */
+ rhl_for_each_entry_rcu(g, tmp, list, hash) {
+ struct match_list *curr_match;
+
+ if (fg && fg != g)
+ continue;
+
+ if (unlikely(!tree_get_node(&g->node)))
+ continue;
+
+ curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
+ if (!curr_match) {
+ rcu_read_unlock();
+ free_match_list(match_head, ft_locked);
+ return -ENOMEM;
+ }
+ curr_match->g = g;
+ list_add_tail(&curr_match->list, &match_head->list);
+ }
+ rcu_read_unlock();
+ return err;
+}
+
+static u64 matched_fgs_get_version(struct list_head *match_head)
+{
+ struct match_list *iter;
+ u64 version = 0;
+
+ list_for_each_entry(iter, match_head, list)
+ version += (u64)atomic_read(&iter->g->node.version);
+ return version;
+}
+
+static struct fs_fte *
+lookup_fte_locked(struct mlx5_flow_group *g,
+ const u32 *match_value,
+ bool take_write)
+{
+ struct fs_fte *fte_tmp;
+
+ if (take_write)
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+ else
+ nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
+ fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
+ rhash_fte);
+ if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
+ fte_tmp = NULL;
+ goto out;
+ }
+ if (!fte_tmp->node.active) {
+ tree_put_node(&fte_tmp->node, false);
+ fte_tmp = NULL;
+ goto out;
+ }
+
+ nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+out:
+ if (take_write)
+ up_write_ref_node(&g->node, false);
+ else
+ up_read_ref_node(&g->node);
+ return fte_tmp;
+}
+
+static struct mlx5_flow_handle *
+try_add_to_existing_fg(struct mlx5_flow_table *ft,
+ struct list_head *match_head,
+ const struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ int ft_version)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *g;
+ struct mlx5_flow_handle *rule;
+ struct match_list *iter;
+ bool take_write = false;
+ struct fs_fte *fte;
+ u64 version = 0;
+ int err;
+
+ fte = alloc_fte(ft, spec, flow_act);
+ if (IS_ERR(fte))
+ return ERR_PTR(-ENOMEM);
+
+search_again_locked:
+ if (flow_act->flags & FLOW_ACT_NO_APPEND)
+ goto skip_search;
+ version = matched_fgs_get_version(match_head);
+ /* Try to find an fte with identical match value and attempt update its
+ * action.
+ */
+ list_for_each_entry(iter, match_head, list) {
+ struct fs_fte *fte_tmp;
+
+ g = iter->g;
+ fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
+ if (!fte_tmp)
+ continue;
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
+ /* No error check needed here, because insert_fte() is not called */
+ up_write_ref_node(&fte_tmp->node, false);
+ tree_put_node(&fte_tmp->node, false);
+ kmem_cache_free(steering->ftes_cache, fte);
+ return rule;
+ }
+
+skip_search:
+ /* No group with matching fte found, or we skipped the search.
+ * Try to add a new fte to any matching fg.
+ */
+
+ /* Check the ft version, for case that new flow group
+ * was added while the fgs weren't locked
+ */
+ if (atomic_read(&ft->node.version) != ft_version) {
+ rule = ERR_PTR(-EAGAIN);
+ goto out;
+ }
+
+ /* Check the fgs version. If version have changed it could be that an
+ * FTE with the same match value was added while the fgs weren't
+ * locked.
+ */
+ if (!(flow_act->flags & FLOW_ACT_NO_APPEND) &&
+ version != matched_fgs_get_version(match_head)) {
+ take_write = true;
+ goto search_again_locked;
+ }
+
+ list_for_each_entry(iter, match_head, list) {
+ g = iter->g;
+
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+
+ if (!g->node.active) {
+ up_write_ref_node(&g->node, false);
+ continue;
+ }
+
+ err = insert_fte(g, fte);
+ if (err) {
+ up_write_ref_node(&g->node, false);
+ if (err == -ENOSPC)
+ continue;
+ kmem_cache_free(steering->ftes_cache, fte);
+ return ERR_PTR(err);
+ }
+
+ nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+ up_write_ref_node(&g->node, false);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
+ up_write_ref_node(&fte->node, false);
+ if (IS_ERR(rule))
+ tree_put_node(&fte->node, false);
+ return rule;
+ }
+ rule = ERR_PTR(-ENOENT);
+out:
+ kmem_cache_free(steering->ftes_cache, fte);
+ return rule;
+}
+
+static struct mlx5_flow_handle *
+_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+ const struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num)
+
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_handle *rule;
+ struct match_list match_head;
+ struct mlx5_flow_group *g;
+ bool take_write = false;
+ struct fs_fte *fte;
+ int version;
+ int err;
+ int i;
+
+ if (!check_valid_spec(spec))
+ return ERR_PTR(-EINVAL);
+
+ if (flow_act->fg && ft->autogroup.active)
+ return ERR_PTR(-EINVAL);
+
+ for (i = 0; i < dest_num; i++) {
+ if (!dest_is_valid(&dest[i], flow_act, ft))
+ return ERR_PTR(-EINVAL);
+ }
+ nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+search_again_locked:
+ version = atomic_read(&ft->node.version);
+
+ /* Collect all fgs which has a matching match_criteria */
+ err = build_match_list(&match_head, ft, spec, flow_act->fg, take_write);
+ if (err) {
+ if (take_write)
+ up_write_ref_node(&ft->node, false);
+ else
+ up_read_ref_node(&ft->node);
+ return ERR_PTR(err);
+ }
+
+ if (!take_write)
+ up_read_ref_node(&ft->node);
+
+ rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest,
+ dest_num, version);
+ free_match_list(&match_head, take_write);
+ if (!IS_ERR(rule) ||
+ (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
+ if (take_write)
+ up_write_ref_node(&ft->node, false);
+ return rule;
+ }
+
+ if (!take_write) {
+ nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+ take_write = true;
+ }
+
+ if (PTR_ERR(rule) == -EAGAIN ||
+ version != atomic_read(&ft->node.version))
+ goto search_again_locked;
+
+ g = alloc_auto_flow_group(ft, spec);
+ if (IS_ERR(g)) {
+ rule = ERR_CAST(g);
+ up_write_ref_node(&ft->node, false);
+ return rule;
+ }
+
+ fte = alloc_fte(ft, spec, flow_act);
+ if (IS_ERR(fte)) {
+ up_write_ref_node(&ft->node, false);
+ err = PTR_ERR(fte);
+ goto err_alloc_fte;
+ }
+
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+ up_write_ref_node(&ft->node, false);
+
+ err = create_auto_flow_group(ft, g);
+ if (err)
+ goto err_release_fg;
+
+ err = insert_fte(g, fte);
+ if (err)
+ goto err_release_fg;
+
+ nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+ up_write_ref_node(&g->node, false);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
+ up_write_ref_node(&fte->node, false);
+ if (IS_ERR(rule))
+ tree_put_node(&fte->node, false);
+ tree_put_node(&g->node, false);
+ return rule;
+
+err_release_fg:
+ up_write_ref_node(&g->node, false);
+ kmem_cache_free(steering->ftes_cache, fte);
+err_alloc_fte:
+ tree_put_node(&g->node, false);
+ return ERR_PTR(err);
+}
+
+static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
+{
+ return ((ft->type == FS_FT_NIC_RX) &&
+ (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
+}
+
+struct mlx5_flow_handle *
+mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+ const struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int num_dest)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ static const struct mlx5_flow_spec zero_spec = {};
+ struct mlx5_flow_destination *gen_dest = NULL;
+ struct mlx5_flow_table *next_ft = NULL;
+ struct mlx5_flow_handle *handle = NULL;
+ u32 sw_action = flow_act->action;
+ int i;
+
+ if (!spec)
+ spec = &zero_spec;
+
+ if (!is_fwd_next_action(sw_action))
+ return _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
+
+ if (!fwd_next_prio_supported(ft))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&root->chain_lock);
+ next_ft = find_next_fwd_ft(ft, flow_act);
+ if (!next_ft) {
+ handle = ERR_PTR(-EOPNOTSUPP);
+ goto unlock;
+ }
+
+ gen_dest = kcalloc(num_dest + 1, sizeof(*dest),
+ GFP_KERNEL);
+ if (!gen_dest) {
+ handle = ERR_PTR(-ENOMEM);
+ goto unlock;
+ }
+ for (i = 0; i < num_dest; i++)
+ gen_dest[i] = dest[i];
+ gen_dest[i].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ gen_dest[i].ft = next_ft;
+ dest = gen_dest;
+ num_dest++;
+ flow_act->action &= ~(MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
+ if (IS_ERR(handle))
+ goto unlock;
+
+ if (list_empty(&handle->rule[num_dest - 1]->next_ft)) {
+ mutex_lock(&next_ft->lock);
+ list_add(&handle->rule[num_dest - 1]->next_ft,
+ &next_ft->fwd_rules);
+ mutex_unlock(&next_ft->lock);
+ handle->rule[num_dest - 1]->sw_action = sw_action;
+ handle->rule[num_dest - 1]->ft = ft;
+ }
+unlock:
+ mutex_unlock(&root->chain_lock);
+ kfree(gen_dest);
+ return handle;
+}
+EXPORT_SYMBOL(mlx5_add_flow_rules);
+
+void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
+{
+ struct fs_fte *fte;
+ int i;
+
+ /* In order to consolidate the HW changes we lock the FTE for other
+ * changes, and increase its refcount, in order not to perform the
+ * "del" functions of the FTE. Will handle them here.
+ * The removal of the rules is done under locked FTE.
+ * After removing all the handle's rules, if there are remaining
+ * rules, it means we just need to modify the FTE in FW, and
+ * unlock/decrease the refcount we increased before.
+ * Otherwise, it means the FTE should be deleted. First delete the
+ * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of
+ * the FTE, which will handle the last decrease of the refcount, as
+ * well as required handling of its parent.
+ */
+ fs_get_obj(fte, handle->rule[0]->node.parent);
+ down_write_ref_node(&fte->node, false);
+ for (i = handle->num_rules - 1; i >= 0; i--)
+ tree_remove_node(&handle->rule[i]->node, true);
+ if (list_empty(&fte->node.children)) {
+ fte->node.del_hw_func(&fte->node);
+ /* Avoid double call to del_hw_fte */
+ fte->node.del_hw_func = NULL;
+ up_write_ref_node(&fte->node, false);
+ tree_put_node(&fte->node, false);
+ } else if (fte->dests_size) {
+ if (fte->modify_mask)
+ modify_fte(fte);
+ up_write_ref_node(&fte->node, false);
+ } else {
+ up_write_ref_node(&fte->node, false);
+ }
+ kfree(handle);
+}
+EXPORT_SYMBOL(mlx5_del_flow_rules);
+
+/* Assuming prio->node.children(flow tables) is sorted by level */
+static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
+{
+ struct fs_node *prio_parent, *child;
+ struct fs_prio *prio;
+
+ fs_get_obj(prio, ft->node.parent);
+
+ if (!list_is_last(&ft->node.list, &prio->node.children))
+ return list_next_entry(ft, node.list);
+
+ prio_parent = find_prio_chains_parent(&prio->node, &child);
+
+ if (prio_parent && list_is_first(&child->list, &prio_parent->children))
+ return find_closest_ft(&prio->node, false, false);
+
+ return find_next_chained_ft(&prio->node);
+}
+
+static int update_root_ft_destroy(struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_ft_underlay_qp *uqp;
+ struct mlx5_flow_table *new_root_ft = NULL;
+ int err = 0;
+ u32 qpn;
+
+ if (root->root_ft != ft)
+ return 0;
+
+ new_root_ft = find_next_ft(ft);
+ if (!new_root_ft) {
+ root->root_ft = NULL;
+ return 0;
+ }
+
+ if (list_empty(&root->underlay_qpns)) {
+ /* Don't set any QPN (zero) in case QPN list is empty */
+ qpn = 0;
+ err = root->cmds->update_root_ft(root, new_root_ft,
+ qpn, false);
+ } else {
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ qpn = uqp->qpn;
+ err = root->cmds->update_root_ft(root,
+ new_root_ft, qpn,
+ false);
+ if (err)
+ break;
+ }
+ }
+
+ if (err)
+ mlx5_core_warn(root->dev,
+ "Update root flow table of id(%u) qpn(%d) failed\n",
+ ft->id, qpn);
+ else
+ root->root_ft = new_root_ft;
+
+ return 0;
+}
+
+/* Connect flow table from previous priority to
+ * the next flow table.
+ */
+static int disconnect_flow_table(struct mlx5_flow_table *ft)
+{
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
+ struct mlx5_flow_table *next_ft;
+ struct fs_prio *prio;
+ int err = 0;
+
+ err = update_root_ft_destroy(ft);
+ if (err)
+ return err;
+
+ fs_get_obj(prio, ft->node.parent);
+ if (!(list_first_entry(&prio->node.children,
+ struct mlx5_flow_table,
+ node.list) == ft))
+ return 0;
+
+ next_ft = find_next_ft(ft);
+ err = connect_fwd_rules(dev, next_ft, ft);
+ if (err)
+ return err;
+
+ err = connect_prev_fts(dev, next_ft, prio);
+ if (err)
+ mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
+ ft->id);
+ return err;
+}
+
+int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ int err = 0;
+
+ mutex_lock(&root->chain_lock);
+ if (!(ft->flags & MLX5_FLOW_TABLE_UNMANAGED))
+ err = disconnect_flow_table(ft);
+ if (err) {
+ mutex_unlock(&root->chain_lock);
+ return err;
+ }
+ if (tree_remove_node(&ft->node, false))
+ mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
+ ft->id);
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_destroy_flow_table);
+
+void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
+{
+ if (tree_remove_node(&fg->node, false))
+ mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
+ fg->id);
+}
+EXPORT_SYMBOL(mlx5_destroy_flow_group);
+
+struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
+ int n)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ if (!steering || !steering->fdb_sub_ns)
+ return NULL;
+
+ return steering->fdb_sub_ns[n];
+}
+EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
+
+static bool is_nic_rx_ns(enum mlx5_flow_namespace_type type)
+{
+ switch (type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC:
+ case MLX5_FLOW_NAMESPACE_LAG:
+ case MLX5_FLOW_NAMESPACE_OFFLOADS:
+ case MLX5_FLOW_NAMESPACE_ETHTOOL:
+ case MLX5_FLOW_NAMESPACE_KERNEL:
+ case MLX5_FLOW_NAMESPACE_LEFTOVERS:
+ case MLX5_FLOW_NAMESPACE_ANCHOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type type)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ struct mlx5_flow_root_namespace *root_ns;
+ int prio = 0;
+ struct fs_prio *fs_prio;
+ struct mlx5_flow_namespace *ns;
+
+ if (!steering)
+ return NULL;
+
+ switch (type) {
+ case MLX5_FLOW_NAMESPACE_FDB:
+ if (steering->fdb_root_ns)
+ return &steering->fdb_root_ns->ns;
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_PORT_SEL:
+ if (steering->port_sel_root_ns)
+ return &steering->port_sel_root_ns->ns;
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
+ if (steering->sniffer_rx_root_ns)
+ return &steering->sniffer_rx_root_ns->ns;
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
+ if (steering->sniffer_tx_root_ns)
+ return &steering->sniffer_tx_root_ns->ns;
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
+ root_ns = steering->fdb_root_ns;
+ prio = FDB_BYPASS_PATH;
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC:
+ case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC:
+ root_ns = steering->egress_root_ns;
+ prio = type - MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
+ root_ns = steering->rdma_rx_root_ns;
+ prio = RDMA_RX_BYPASS_PRIO;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL:
+ root_ns = steering->rdma_rx_root_ns;
+ prio = RDMA_RX_KERNEL_PRIO;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ root_ns = steering->rdma_tx_root_ns;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS:
+ root_ns = steering->rdma_rx_root_ns;
+ prio = RDMA_RX_COUNTERS_PRIO;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS:
+ root_ns = steering->rdma_tx_root_ns;
+ prio = RDMA_TX_COUNTERS_PRIO;
+ break;
+ default: /* Must be NIC RX */
+ WARN_ON(!is_nic_rx_ns(type));
+ root_ns = steering->root_ns;
+ prio = type;
+ break;
+ }
+
+ if (!root_ns)
+ return NULL;
+
+ fs_prio = find_prio(&root_ns->ns, prio);
+ if (!fs_prio)
+ return NULL;
+
+ ns = list_first_entry(&fs_prio->node.children,
+ typeof(*ns),
+ node.list);
+
+ return ns;
+}
+EXPORT_SYMBOL(mlx5_get_flow_namespace);
+
+struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ int vport)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ if (!steering)
+ return NULL;
+
+ switch (type) {
+ case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
+ if (vport >= steering->esw_egress_acl_vports)
+ return NULL;
+ if (steering->esw_egress_root_ns &&
+ steering->esw_egress_root_ns[vport])
+ return &steering->esw_egress_root_ns[vport]->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+ if (vport >= steering->esw_ingress_acl_vports)
+ return NULL;
+ if (steering->esw_ingress_root_ns &&
+ steering->esw_ingress_root_ns[vport])
+ return &steering->esw_ingress_root_ns[vport]->ns;
+ else
+ return NULL;
+ default:
+ return NULL;
+ }
+}
+
+static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio,
+ int num_levels,
+ enum fs_node_type type)
+{
+ struct fs_prio *fs_prio;
+
+ fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
+ if (!fs_prio)
+ return ERR_PTR(-ENOMEM);
+
+ fs_prio->node.type = type;
+ tree_init_node(&fs_prio->node, NULL, del_sw_prio);
+ tree_add_node(&fs_prio->node, &ns->node);
+ fs_prio->num_levels = num_levels;
+ fs_prio->prio = prio;
+ list_add_tail(&fs_prio->node.list, &ns->node.children);
+
+ return fs_prio;
+}
+
+static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns,
+ unsigned int prio,
+ int num_levels)
+{
+ return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS);
+}
+
+static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio, int num_levels)
+{
+ return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO);
+}
+
+static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
+ *ns)
+{
+ ns->node.type = FS_TYPE_NAMESPACE;
+
+ return ns;
+}
+
+static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
+ int def_miss_act)
+{
+ struct mlx5_flow_namespace *ns;
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ return ERR_PTR(-ENOMEM);
+
+ fs_init_namespace(ns);
+ ns->def_miss_action = def_miss_act;
+ tree_init_node(&ns->node, NULL, del_sw_ns);
+ tree_add_node(&ns->node, &prio->node);
+ list_add_tail(&ns->node.list, &prio->node.children);
+
+ return ns;
+}
+
+static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
+ struct init_tree_node *prio_metadata)
+{
+ struct fs_prio *fs_prio;
+ int i;
+
+ for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
+ fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels);
+ if (IS_ERR(fs_prio))
+ return PTR_ERR(fs_prio);
+ }
+ return 0;
+}
+
+#define FLOW_TABLE_BIT_SZ 1
+#define GET_FLOW_TABLE_CAP(dev, offset) \
+ ((be32_to_cpu(*((__be32 *)(dev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur) + \
+ offset / 32)) >> \
+ (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
+static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
+{
+ int i;
+
+ for (i = 0; i < caps->arr_sz; i++) {
+ if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
+ return false;
+ }
+ return true;
+}
+
+static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
+ struct init_tree_node *init_node,
+ struct fs_node *fs_parent_node,
+ struct init_tree_node *init_parent_node,
+ int prio)
+{
+ int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
+ flow_table_properties_nic_receive.
+ max_ft_level);
+ struct mlx5_flow_namespace *fs_ns;
+ struct fs_prio *fs_prio;
+ struct fs_node *base;
+ int i;
+ int err;
+
+ if (init_node->type == FS_TYPE_PRIO) {
+ if ((init_node->min_ft_level > max_ft_level) ||
+ !has_required_caps(steering->dev, &init_node->caps))
+ return 0;
+
+ fs_get_obj(fs_ns, fs_parent_node);
+ if (init_node->num_leaf_prios)
+ return create_leaf_prios(fs_ns, prio, init_node);
+ fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels);
+ if (IS_ERR(fs_prio))
+ return PTR_ERR(fs_prio);
+ base = &fs_prio->node;
+ } else if (init_node->type == FS_TYPE_NAMESPACE) {
+ fs_get_obj(fs_prio, fs_parent_node);
+ fs_ns = fs_create_namespace(fs_prio, init_node->def_miss_action);
+ if (IS_ERR(fs_ns))
+ return PTR_ERR(fs_ns);
+ base = &fs_ns->node;
+ } else {
+ return -EINVAL;
+ }
+ prio = 0;
+ for (i = 0; i < init_node->ar_size; i++) {
+ err = init_root_tree_recursive(steering, &init_node->children[i],
+ base, init_node, prio);
+ if (err)
+ return err;
+ if (init_node->children[i].type == FS_TYPE_PRIO &&
+ init_node->children[i].num_leaf_prios) {
+ prio += init_node->children[i].num_leaf_prios;
+ }
+ }
+
+ return 0;
+}
+
+static int init_root_tree(struct mlx5_flow_steering *steering,
+ struct init_tree_node *init_node,
+ struct fs_node *fs_parent_node)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < init_node->ar_size; i++) {
+ err = init_root_tree_recursive(steering, &init_node->children[i],
+ fs_parent_node,
+ init_node, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static void del_sw_root_ns(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5_flow_namespace *ns;
+
+ fs_get_obj(ns, node);
+ root_ns = container_of(ns, struct mlx5_flow_root_namespace, ns);
+ mutex_destroy(&root_ns->chain_lock);
+ kfree(node);
+}
+
+static struct mlx5_flow_root_namespace
+*create_root_ns(struct mlx5_flow_steering *steering,
+ enum fs_flow_table_type table_type)
+{
+ const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5_flow_namespace *ns;
+
+ /* Create the root namespace */
+ root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
+ if (!root_ns)
+ return NULL;
+
+ root_ns->dev = steering->dev;
+ root_ns->table_type = table_type;
+ root_ns->cmds = cmds;
+
+ INIT_LIST_HEAD(&root_ns->underlay_qpns);
+
+ ns = &root_ns->ns;
+ fs_init_namespace(ns);
+ mutex_init(&root_ns->chain_lock);
+ tree_init_node(&ns->node, NULL, del_sw_root_ns);
+ tree_add_node(&ns->node, NULL);
+
+ return root_ns;
+}
+
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
+
+static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
+{
+ struct fs_prio *prio;
+
+ fs_for_each_prio(prio, ns) {
+ /* This updates prio start_level and num_levels */
+ set_prio_attrs_in_prio(prio, acc_level);
+ acc_level += prio->num_levels;
+ }
+ return acc_level;
+}
+
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
+{
+ struct mlx5_flow_namespace *ns;
+ int acc_level_ns = acc_level;
+
+ prio->start_level = acc_level;
+ fs_for_each_ns(ns, prio) {
+ /* This updates start_level and num_levels of ns's priority descendants */
+ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
+
+ /* If this a prio with chains, and we can jump from one chain
+ * (namespace) to another, so we accumulate the levels
+ */
+ if (prio->node.type == FS_TYPE_PRIO_CHAINS)
+ acc_level = acc_level_ns;
+ }
+
+ if (!prio->num_levels)
+ prio->num_levels = acc_level_ns - prio->start_level;
+ WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
+}
+
+static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
+{
+ struct mlx5_flow_namespace *ns = &root_ns->ns;
+ struct fs_prio *prio;
+ int start_level = 0;
+
+ fs_for_each_prio(prio, ns) {
+ set_prio_attrs_in_prio(prio, start_level);
+ start_level += prio->num_levels;
+ }
+}
+
+#define ANCHOR_PRIO 0
+#define ANCHOR_SIZE 1
+#define ANCHOR_LEVEL 0
+static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *ft;
+
+ ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
+ if (WARN_ON(!ns))
+ return -EINVAL;
+
+ ft_attr.max_fte = ANCHOR_SIZE;
+ ft_attr.level = ANCHOR_LEVEL;
+ ft_attr.prio = ANCHOR_PRIO;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
+ return PTR_ERR(ft);
+ }
+ return 0;
+}
+
+static int init_root_ns(struct mlx5_flow_steering *steering)
+{
+ int err;
+
+ steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
+ if (!steering->root_ns)
+ return -ENOMEM;
+
+ err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
+ if (err)
+ goto out_err;
+
+ set_prio_attrs(steering->root_ns);
+ err = create_anchor_flow_table(steering);
+ if (err)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ cleanup_root_ns(steering->root_ns);
+ steering->root_ns = NULL;
+ return err;
+}
+
+static void clean_tree(struct fs_node *node)
+{
+ if (node) {
+ struct fs_node *iter;
+ struct fs_node *temp;
+
+ tree_get_node(node);
+ list_for_each_entry_safe(iter, temp, &node->children, list)
+ clean_tree(iter);
+ tree_put_node(node, false);
+ tree_remove_node(node, false);
+ }
+}
+
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
+{
+ if (!root_ns)
+ return;
+
+ clean_tree(&root_ns->ns.node);
+}
+
+static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
+ if (!steering->sniffer_tx_root_ns)
+ return -ENOMEM;
+
+ /* Create single prio */
+ prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
+ if (!steering->sniffer_rx_root_ns)
+ return -ENOMEM;
+
+ /* Create single prio */
+ prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+#define PORT_SEL_NUM_LEVELS 3
+static int init_port_sel_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->port_sel_root_ns = create_root_ns(steering, FS_FT_PORT_SEL);
+ if (!steering->port_sel_root_ns)
+ return -ENOMEM;
+
+ /* Create single prio */
+ prio = fs_create_prio(&steering->port_sel_root_ns->ns, 0,
+ PORT_SEL_NUM_LEVELS);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
+{
+ int err;
+
+ steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX);
+ if (!steering->rdma_rx_root_ns)
+ return -ENOMEM;
+
+ err = init_root_tree(steering, &rdma_rx_root_fs,
+ &steering->rdma_rx_root_ns->ns.node);
+ if (err)
+ goto out_err;
+
+ set_prio_attrs(steering->rdma_rx_root_ns);
+
+ return 0;
+
+out_err:
+ cleanup_root_ns(steering->rdma_rx_root_ns);
+ steering->rdma_rx_root_ns = NULL;
+ return err;
+}
+
+static int init_rdma_tx_root_ns(struct mlx5_flow_steering *steering)
+{
+ int err;
+
+ steering->rdma_tx_root_ns = create_root_ns(steering, FS_FT_RDMA_TX);
+ if (!steering->rdma_tx_root_ns)
+ return -ENOMEM;
+
+ err = init_root_tree(steering, &rdma_tx_root_fs,
+ &steering->rdma_tx_root_ns->ns.node);
+ if (err)
+ goto out_err;
+
+ set_prio_attrs(steering->rdma_tx_root_ns);
+
+ return 0;
+
+out_err:
+ cleanup_root_ns(steering->rdma_tx_root_ns);
+ steering->rdma_tx_root_ns = NULL;
+ return err;
+}
+
+/* FT and tc chains are stored in the same array so we can re-use the
+ * mlx5_get_fdb_sub_ns() and tc api for FT chains.
+ * When creating a new ns for each chain store it in the first available slot.
+ * Assume tc chains are created and stored first and only then the FT chain.
+ */
+static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
+ struct mlx5_flow_namespace *ns)
+{
+ int chain = 0;
+
+ while (steering->fdb_sub_ns[chain])
+ ++chain;
+
+ steering->fdb_sub_ns[chain] = ns;
+}
+
+static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
+ struct fs_prio *maj_prio)
+{
+ struct mlx5_flow_namespace *ns;
+ struct fs_prio *min_prio;
+ int prio;
+
+ ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
+
+ for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) {
+ min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO);
+ if (IS_ERR(min_prio))
+ return PTR_ERR(min_prio);
+ }
+
+ store_fdb_sub_ns_prio_chain(steering, ns);
+
+ return 0;
+}
+
+static int create_fdb_chains(struct mlx5_flow_steering *steering,
+ int fs_prio,
+ int chains)
+{
+ struct fs_prio *maj_prio;
+ int levels;
+ int chain;
+ int err;
+
+ levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains;
+ maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
+ fs_prio,
+ levels);
+ if (IS_ERR(maj_prio))
+ return PTR_ERR(maj_prio);
+
+ for (chain = 0; chain < chains; chain++) {
+ err = create_fdb_sub_ns_prio_chain(steering, maj_prio);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int create_fdb_fast_path(struct mlx5_flow_steering *steering)
+{
+ int err;
+
+ steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS,
+ sizeof(*steering->fdb_sub_ns),
+ GFP_KERNEL);
+ if (!steering->fdb_sub_ns)
+ return -ENOMEM;
+
+ err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1);
+ if (err)
+ return err;
+
+ err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int create_fdb_bypass(struct mlx5_flow_steering *steering)
+{
+ struct mlx5_flow_namespace *ns;
+ struct fs_prio *prio;
+ int i;
+
+ prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 0);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+
+ ns = fs_create_namespace(prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
+
+ for (i = 0; i < MLX5_BY_PASS_NUM_REGULAR_PRIOS; i++) {
+ prio = fs_create_prio(ns, i, 1);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+ }
+ return 0;
+}
+
+static void cleanup_fdb_root_ns(struct mlx5_flow_steering *steering)
+{
+ cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
+}
+
+static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *maj_prio;
+ int err;
+
+ steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
+ if (!steering->fdb_root_ns)
+ return -ENOMEM;
+
+ err = create_fdb_bypass(steering);
+ if (err)
+ goto out_err;
+
+ err = create_fdb_fast_path(steering);
+ if (err)
+ goto out_err;
+
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_TC_MISS, 1);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
+ goto out_err;
+ }
+
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
+ goto out_err;
+ }
+
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
+ goto out_err;
+ }
+
+ /* We put this priority last, knowing that nothing will get here
+ * unless explicitly forwarded to. This is possible because the
+ * slow path tables have catch all rules and nothing gets passed
+ * those tables.
+ */
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_PER_VPORT, 1);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
+ goto out_err;
+ }
+
+ set_prio_attrs(steering->fdb_root_ns);
+ return 0;
+
+out_err:
+ cleanup_fdb_root_ns(steering);
+ return err;
+}
+
+static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
+{
+ struct fs_prio *prio;
+
+ steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
+ if (!steering->esw_egress_root_ns[vport])
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
+{
+ struct fs_prio *prio;
+
+ steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
+ if (!steering->esw_ingress_root_ns[vport])
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err;
+ int i;
+
+ steering->esw_egress_root_ns =
+ kcalloc(total_vports,
+ sizeof(*steering->esw_egress_root_ns),
+ GFP_KERNEL);
+ if (!steering->esw_egress_root_ns)
+ return -ENOMEM;
+
+ for (i = 0; i < total_vports; i++) {
+ err = init_egress_acl_root_ns(steering, i);
+ if (err)
+ goto cleanup_root_ns;
+ }
+ steering->esw_egress_acl_vports = total_vports;
+ return 0;
+
+cleanup_root_ns:
+ for (i--; i >= 0; i--)
+ cleanup_root_ns(steering->esw_egress_root_ns[i]);
+ kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
+ return err;
+}
+
+void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int i;
+
+ if (!steering->esw_egress_root_ns)
+ return;
+
+ for (i = 0; i < steering->esw_egress_acl_vports; i++)
+ cleanup_root_ns(steering->esw_egress_root_ns[i]);
+
+ kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
+}
+
+int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err;
+ int i;
+
+ steering->esw_ingress_root_ns =
+ kcalloc(total_vports,
+ sizeof(*steering->esw_ingress_root_ns),
+ GFP_KERNEL);
+ if (!steering->esw_ingress_root_ns)
+ return -ENOMEM;
+
+ for (i = 0; i < total_vports; i++) {
+ err = init_ingress_acl_root_ns(steering, i);
+ if (err)
+ goto cleanup_root_ns;
+ }
+ steering->esw_ingress_acl_vports = total_vports;
+ return 0;
+
+cleanup_root_ns:
+ for (i--; i >= 0; i--)
+ cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+ kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
+ return err;
+}
+
+void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int i;
+
+ if (!steering->esw_ingress_root_ns)
+ return;
+
+ for (i = 0; i < steering->esw_ingress_acl_vports; i++)
+ cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+
+ kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
+}
+
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+
+ ns = mlx5_get_flow_namespace(dev, type);
+ if (!ns)
+ return 0;
+
+ root = find_root(&ns->node);
+ if (!root)
+ return 0;
+
+ return root->cmds->get_capabilities(root, root->table_type);
+}
+
+static int init_egress_root_ns(struct mlx5_flow_steering *steering)
+{
+ int err;
+
+ steering->egress_root_ns = create_root_ns(steering,
+ FS_FT_NIC_TX);
+ if (!steering->egress_root_ns)
+ return -ENOMEM;
+
+ err = init_root_tree(steering, &egress_root_fs,
+ &steering->egress_root_ns->ns.node);
+ if (err)
+ goto cleanup;
+ set_prio_attrs(steering->egress_root_ns);
+ return 0;
+cleanup:
+ cleanup_root_ns(steering->egress_root_ns);
+ steering->egress_root_ns = NULL;
+ return err;
+}
+
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ cleanup_root_ns(steering->root_ns);
+ cleanup_fdb_root_ns(steering);
+ cleanup_root_ns(steering->port_sel_root_ns);
+ cleanup_root_ns(steering->sniffer_rx_root_ns);
+ cleanup_root_ns(steering->sniffer_tx_root_ns);
+ cleanup_root_ns(steering->rdma_rx_root_ns);
+ cleanup_root_ns(steering->rdma_tx_root_ns);
+ cleanup_root_ns(steering->egress_root_ns);
+}
+
+int mlx5_fs_core_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err = 0;
+
+ if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ (MLX5_CAP_GEN(dev, nic_flow_table))) ||
+ ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
+ MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
+ err = init_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_ESWITCH_MANAGER(dev)) {
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
+ err = init_fdb_root_ns(steering);
+ if (err)
+ goto err;
+ }
+ }
+
+ if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
+ err = init_sniffer_rx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
+ err = init_sniffer_tx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE_PORT_SELECTION(dev, ft_support)) {
+ err = init_port_sel_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
+ MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) {
+ err = init_rdma_rx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE_RDMA_TX(dev, ft_support)) {
+ err = init_rdma_tx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
+ err = init_egress_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ mlx5_fs_core_cleanup(dev);
+ return err;
+}
+
+void mlx5_fs_core_free(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ kmem_cache_destroy(steering->ftes_cache);
+ kmem_cache_destroy(steering->fgs_cache);
+ kfree(steering);
+ mlx5_ft_pool_destroy(dev);
+ mlx5_cleanup_fc_stats(dev);
+}
+
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering;
+ int err = 0;
+
+ err = mlx5_init_fc_stats(dev);
+ if (err)
+ return err;
+
+ err = mlx5_ft_pool_init(dev);
+ if (err)
+ goto err;
+
+ steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+ if (!steering) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ steering->dev = dev;
+ dev->priv.steering = steering;
+
+ if (mlx5_fs_dr_is_supported(dev))
+ steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
+ else
+ steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+
+ steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+ sizeof(struct mlx5_flow_group), 0,
+ 0, NULL);
+ steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+ 0, NULL);
+ if (!steering->ftes_cache || !steering->fgs_cache) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ return 0;
+
+err:
+ mlx5_fs_core_free(dev);
+ return err;
+}
+
+int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+ struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+ struct mlx5_ft_underlay_qp *new_uqp;
+ int err = 0;
+
+ new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL);
+ if (!new_uqp)
+ return -ENOMEM;
+
+ mutex_lock(&root->chain_lock);
+
+ if (!root->root_ft) {
+ err = -EINVAL;
+ goto update_ft_fail;
+ }
+
+ err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
+ false);
+ if (err) {
+ mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
+ underlay_qpn, err);
+ goto update_ft_fail;
+ }
+
+ new_uqp->qpn = underlay_qpn;
+ list_add_tail(&new_uqp->list, &root->underlay_qpns);
+
+ mutex_unlock(&root->chain_lock);
+
+ return 0;
+
+update_ft_fail:
+ mutex_unlock(&root->chain_lock);
+ kfree(new_uqp);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
+
+int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+ struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+ struct mlx5_ft_underlay_qp *uqp;
+ bool found = false;
+ int err = 0;
+
+ mutex_lock(&root->chain_lock);
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ if (uqp->qpn == underlay_qpn) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n",
+ underlay_qpn);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
+ true);
+ if (err)
+ mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
+ underlay_qpn, err);
+
+ list_del(&uqp->list);
+ mutex_unlock(&root->chain_lock);
+ kfree(uqp);
+
+ return 0;
+
+out:
+ mutex_unlock(&root->chain_lock);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
+
+static struct mlx5_flow_root_namespace
+*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5_flow_namespace *ns;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS ||
+ ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS)
+ ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0);
+ else
+ ns = mlx5_get_flow_namespace(dev, ns_type);
+ if (!ns)
+ return NULL;
+
+ return find_root(&ns->node);
+}
+
+struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+ u8 ns_type, u8 num_actions,
+ void *modify_actions)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_modify_hdr *modify_hdr;
+ int err;
+
+ root = get_root_namespace(dev, ns_type);
+ if (!root)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL);
+ if (!modify_hdr)
+ return ERR_PTR(-ENOMEM);
+
+ modify_hdr->ns_type = ns_type;
+ err = root->cmds->modify_header_alloc(root, ns_type, num_actions,
+ modify_actions, modify_hdr);
+ if (err) {
+ kfree(modify_hdr);
+ return ERR_PTR(err);
+ }
+
+ return modify_hdr;
+}
+EXPORT_SYMBOL(mlx5_modify_header_alloc);
+
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ struct mlx5_flow_root_namespace *root;
+
+ root = get_root_namespace(dev, modify_hdr->ns_type);
+ if (WARN_ON(!root))
+ return;
+ root->cmds->modify_header_dealloc(root, modify_hdr);
+ kfree(modify_hdr);
+}
+EXPORT_SYMBOL(mlx5_modify_header_dealloc);
+
+struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+ struct mlx5_pkt_reformat_params *params,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5_pkt_reformat *pkt_reformat;
+ struct mlx5_flow_root_namespace *root;
+ int err;
+
+ root = get_root_namespace(dev, ns_type);
+ if (!root)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL);
+ if (!pkt_reformat)
+ return ERR_PTR(-ENOMEM);
+
+ pkt_reformat->ns_type = ns_type;
+ pkt_reformat->reformat_type = params->type;
+ err = root->cmds->packet_reformat_alloc(root, params, ns_type,
+ pkt_reformat);
+ if (err) {
+ kfree(pkt_reformat);
+ return ERR_PTR(err);
+ }
+
+ return pkt_reformat;
+}
+EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
+
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ struct mlx5_flow_root_namespace *root;
+
+ root = get_root_namespace(dev, pkt_reformat->ns_type);
+ if (WARN_ON(!root))
+ return;
+ root->cmds->packet_reformat_dealloc(root, pkt_reformat);
+ kfree(pkt_reformat);
+}
+EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
+
+int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer)
+{
+ return definer->id;
+}
+
+struct mlx5_flow_definer *
+mlx5_create_match_definer(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type ns_type, u16 format_id,
+ u32 *match_mask)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_definer *definer;
+ int id;
+
+ root = get_root_namespace(dev, ns_type);
+ if (!root)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ definer = kzalloc(sizeof(*definer), GFP_KERNEL);
+ if (!definer)
+ return ERR_PTR(-ENOMEM);
+
+ definer->ns_type = ns_type;
+ id = root->cmds->create_match_definer(root, format_id, match_mask);
+ if (id < 0) {
+ mlx5_core_warn(root->dev, "Failed to create match definer (%d)\n", id);
+ kfree(definer);
+ return ERR_PTR(id);
+ }
+ definer->id = id;
+ return definer;
+}
+
+void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
+ struct mlx5_flow_definer *definer)
+{
+ struct mlx5_flow_root_namespace *root;
+
+ root = get_root_namespace(dev, definer->ns_type);
+ if (WARN_ON(!root))
+ return;
+
+ root->cmds->destroy_match_definer(root, definer->id);
+ kfree(definer);
+}
+
+int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns)
+{
+ if (peer_ns && ns->mode != peer_ns->mode) {
+ mlx5_core_err(ns->dev,
+ "Can't peer namespace of different steering mode\n");
+ return -EINVAL;
+ }
+
+ return ns->cmds->set_peer(ns, peer_ns);
+}
+
+/* This function should be called only at init stage of the namespace.
+ * It is not safe to call this function while steering operations
+ * are executed in the namespace.
+ */
+int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
+ enum mlx5_flow_steering_mode mode)
+{
+ struct mlx5_flow_root_namespace *root;
+ const struct mlx5_flow_cmds *cmds;
+ int err;
+
+ root = find_root(&ns->node);
+ if (&root->ns != ns)
+ /* Can't set cmds to non root namespace */
+ return -EINVAL;
+
+ if (root->table_type != FS_FT_FDB)
+ return -EOPNOTSUPP;
+
+ if (root->mode == mode)
+ return 0;
+
+ if (mode == MLX5_FLOW_STEERING_MODE_SMFS)
+ cmds = mlx5_fs_cmd_get_dr_cmds();
+ else
+ cmds = mlx5_fs_cmd_get_fw_cmds();
+ if (!cmds)
+ return -EOPNOTSUPP;
+
+ err = cmds->create_ns(root);
+ if (err) {
+ mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n",
+ err);
+ return err;
+ }
+
+ root->cmds->destroy_ns(root);
+ root->cmds = cmds;
+ root->mode = mode;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
new file mode 100644
index 000000000..3af50fd04
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_CORE_
+#define _MLX5_FS_CORE_
+
+#include <linux/refcount.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rhashtable.h>
+#include <linux/llist.h>
+#include <steering/fs_dr.h>
+
+#define FDB_TC_MAX_CHAIN 3
+#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1)
+#define FDB_TC_SLOW_PATH_CHAIN (FDB_FT_CHAIN + 1)
+
+/* The index of the last real chain (FT) + 1 as chain zero is valid as well */
+#define FDB_NUM_CHAINS (FDB_FT_CHAIN + 1)
+
+#define FDB_TC_MAX_PRIO 16
+#define FDB_TC_LEVELS_PER_PRIO 2
+
+struct mlx5_flow_definer {
+ enum mlx5_flow_namespace_type ns_type;
+ u32 id;
+};
+
+struct mlx5_modify_hdr {
+ enum mlx5_flow_namespace_type ns_type;
+ union {
+ struct mlx5_fs_dr_action action;
+ u32 id;
+ };
+};
+
+struct mlx5_pkt_reformat {
+ enum mlx5_flow_namespace_type ns_type;
+ int reformat_type; /* from mlx5_ifc */
+ union {
+ struct mlx5_fs_dr_action action;
+ u32 id;
+ };
+};
+
+/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only,
+ * and those are in parallel to one another when going over them to connect
+ * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one
+ * parallel namespace will not automatically connect to the first flow table
+ * found in any prio in any next namespace, but skip the entire containing
+ * TYPE_PRIO_CHAINS prio.
+ *
+ * This is used to implement tc chains, each chain of prios is a different
+ * namespace inside a containing TYPE_PRIO_CHAINS prio.
+ */
+
+enum fs_node_type {
+ FS_TYPE_NAMESPACE,
+ FS_TYPE_PRIO,
+ FS_TYPE_PRIO_CHAINS,
+ FS_TYPE_FLOW_TABLE,
+ FS_TYPE_FLOW_GROUP,
+ FS_TYPE_FLOW_ENTRY,
+ FS_TYPE_FLOW_DEST
+};
+
+enum fs_flow_table_type {
+ FS_FT_NIC_RX = 0x0,
+ FS_FT_NIC_TX = 0x1,
+ FS_FT_ESW_EGRESS_ACL = 0x2,
+ FS_FT_ESW_INGRESS_ACL = 0x3,
+ FS_FT_FDB = 0X4,
+ FS_FT_SNIFFER_RX = 0X5,
+ FS_FT_SNIFFER_TX = 0X6,
+ FS_FT_RDMA_RX = 0X7,
+ FS_FT_RDMA_TX = 0X8,
+ FS_FT_PORT_SEL = 0X9,
+ FS_FT_MAX_TYPE = FS_FT_PORT_SEL,
+};
+
+enum fs_flow_table_op_mod {
+ FS_FT_OP_MOD_NORMAL,
+ FS_FT_OP_MOD_LAG_DEMUX,
+};
+
+enum fs_fte_status {
+ FS_FTE_STATUS_EXISTING = 1UL << 0,
+};
+
+enum mlx5_flow_steering_mode {
+ MLX5_FLOW_STEERING_MODE_DMFS,
+ MLX5_FLOW_STEERING_MODE_SMFS
+};
+
+enum mlx5_flow_steering_capabilty {
+ MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0,
+ MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1,
+};
+
+struct mlx5_flow_steering {
+ struct mlx5_core_dev *dev;
+ enum mlx5_flow_steering_mode mode;
+ struct kmem_cache *fgs_cache;
+ struct kmem_cache *ftes_cache;
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5_flow_root_namespace *fdb_root_ns;
+ struct mlx5_flow_namespace **fdb_sub_ns;
+ struct mlx5_flow_root_namespace **esw_egress_root_ns;
+ struct mlx5_flow_root_namespace **esw_ingress_root_ns;
+ struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
+ struct mlx5_flow_root_namespace *sniffer_rx_root_ns;
+ struct mlx5_flow_root_namespace *rdma_rx_root_ns;
+ struct mlx5_flow_root_namespace *rdma_tx_root_ns;
+ struct mlx5_flow_root_namespace *egress_root_ns;
+ struct mlx5_flow_root_namespace *port_sel_root_ns;
+ int esw_egress_acl_vports;
+ int esw_ingress_acl_vports;
+};
+
+struct fs_node {
+ struct list_head list;
+ struct list_head children;
+ enum fs_node_type type;
+ struct fs_node *parent;
+ struct fs_node *root;
+ /* lock the node for writing and traversing */
+ struct rw_semaphore lock;
+ refcount_t refcount;
+ bool active;
+ void (*del_hw_func)(struct fs_node *);
+ void (*del_sw_func)(struct fs_node *);
+ atomic_t version;
+};
+
+struct mlx5_flow_rule {
+ struct fs_node node;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_destination dest_attr;
+ /* next_ft should be accessed under chain_lock and only of
+ * destination type is FWD_NEXT_fT.
+ */
+ struct list_head next_ft;
+ u32 sw_action;
+};
+
+struct mlx5_flow_handle {
+ int num_rules;
+ struct mlx5_flow_rule *rule[];
+};
+
+/* Type of children is mlx5_flow_group */
+struct mlx5_flow_table {
+ struct fs_node node;
+ struct mlx5_fs_dr_table fs_dr_table;
+ u32 id;
+ u16 vport;
+ unsigned int max_fte;
+ unsigned int level;
+ enum fs_flow_table_type type;
+ enum fs_flow_table_op_mod op_mod;
+ struct {
+ bool active;
+ unsigned int required_groups;
+ unsigned int group_size;
+ unsigned int num_groups;
+ unsigned int max_fte;
+ } autogroup;
+ /* Protect fwd_rules */
+ struct mutex lock;
+ /* FWD rules that point on this flow table */
+ struct list_head fwd_rules;
+ u32 flags;
+ struct rhltable fgs_hash;
+ enum mlx5_flow_table_miss_action def_miss_action;
+ struct mlx5_flow_namespace *ns;
+};
+
+struct mlx5_ft_underlay_qp {
+ struct list_head list;
+ u32 qpn;
+};
+
+#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_e00
+/* Calculate the fte_match_param length and without the reserved length.
+ * Make sure the reserved field is the last.
+ */
+#define MLX5_ST_SZ_DW_MATCH_PARAM \
+ ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \
+ BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \
+ MLX5_FLD_SZ_BYTES(fte_match_param, \
+ MLX5_FTE_MATCH_PARAM_RESERVED) +\
+ MLX5_BYTE_OFF(fte_match_param, \
+ MLX5_FTE_MATCH_PARAM_RESERVED)))
+
+/* Type of children is mlx5_flow_rule */
+struct fs_fte {
+ struct fs_node node;
+ struct mlx5_fs_dr_rule fs_dr_rule;
+ u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
+ u32 dests_size;
+ u32 fwd_dests;
+ u32 index;
+ struct mlx5_flow_context flow_context;
+ struct mlx5_flow_act action;
+ enum fs_fte_status status;
+ struct mlx5_fc *counter;
+ struct rhash_head hash;
+ int modify_mask;
+};
+
+/* Type of children is mlx5_flow_table/namespace */
+struct fs_prio {
+ struct fs_node node;
+ unsigned int num_levels;
+ unsigned int start_level;
+ unsigned int prio;
+ unsigned int num_ft;
+};
+
+/* Type of children is fs_prio */
+struct mlx5_flow_namespace {
+ /* parent == NULL => root ns */
+ struct fs_node node;
+ enum mlx5_flow_table_miss_action def_miss_action;
+};
+
+struct mlx5_flow_group_mask {
+ u8 match_criteria_enable;
+ u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM];
+};
+
+/* Type of children is fs_fte */
+struct mlx5_flow_group {
+ struct fs_node node;
+ struct mlx5_fs_dr_matcher fs_dr_matcher;
+ struct mlx5_flow_group_mask mask;
+ u32 start_index;
+ u32 max_ftes;
+ struct ida fte_allocator;
+ u32 id;
+ struct rhashtable ftes_hash;
+ struct rhlist_head hash;
+};
+
+struct mlx5_flow_root_namespace {
+ struct mlx5_flow_namespace ns;
+ enum mlx5_flow_steering_mode mode;
+ struct mlx5_fs_dr_domain fs_dr_domain;
+ enum fs_flow_table_type table_type;
+ struct mlx5_core_dev *dev;
+ struct mlx5_flow_table *root_ft;
+ /* Should be held when chaining flow tables */
+ struct mutex chain_lock;
+ struct list_head underlay_qpns;
+ const struct mlx5_flow_cmds *cmds;
+};
+
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
+ struct delayed_work *dwork,
+ unsigned long delay);
+void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
+ unsigned long interval);
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
+
+int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns);
+
+int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
+ enum mlx5_flow_steering_mode mode);
+
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev);
+void mlx5_fs_core_free(struct mlx5_core_dev *dev);
+int mlx5_fs_core_init(struct mlx5_core_dev *dev);
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
+void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
+int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
+void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
+
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type);
+
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
+
+#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
+
+#define fs_list_for_each_entry(pos, root) \
+ list_for_each_entry(pos, root, node.list)
+
+#define fs_list_for_each_entry_safe(pos, tmp, root) \
+ list_for_each_entry_safe(pos, tmp, root, node.list)
+
+#define fs_for_each_ns_or_ft_reverse(pos, prio) \
+ list_for_each_entry_reverse(pos, &(prio)->node.children, list)
+
+#define fs_for_each_ns_or_ft(pos, prio) \
+ list_for_each_entry(pos, (&(prio)->node.children), list)
+
+#define fs_for_each_prio(pos, ns) \
+ fs_list_for_each_entry(pos, &(ns)->node.children)
+
+#define fs_for_each_ns(pos, prio) \
+ fs_list_for_each_entry(pos, &(prio)->node.children)
+
+#define fs_for_each_ft(pos, prio) \
+ fs_list_for_each_entry(pos, &(prio)->node.children)
+
+#define fs_for_each_ft_safe(pos, tmp, prio) \
+ fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children)
+
+#define fs_for_each_fg(pos, ft) \
+ fs_list_for_each_entry(pos, &(ft)->node.children)
+
+#define fs_for_each_fte(pos, fg) \
+ fs_list_for_each_entry(pos, &(fg)->node.children)
+
+#define fs_for_each_dst(pos, fte) \
+ fs_list_for_each_entry(pos, &(fte)->node.children)
+
+#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \
+ (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \
+ (type == FS_FT_NIC_TX) ? MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) : \
+ (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \
+ (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \
+ (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \
+ (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \
+ (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \
+ (type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) : \
+ (type == FS_FT_RDMA_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) : \
+ (type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) : \
+ (BUILD_BUG_ON_ZERO(FS_FT_PORT_SEL != FS_FT_MAX_TYPE))\
+ )
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
new file mode 100644
index 000000000..b406e0367
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -0,0 +1,762 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+
+#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000)
+/* Max number of counters to query in bulk read is 32K */
+#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+#define MLX5_INIT_COUNTERS_BULK 8
+#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
+#define MLX5_FC_POOL_USED_BUFF_RATIO 10
+
+struct mlx5_fc_cache {
+ u64 packets;
+ u64 bytes;
+ u64 lastuse;
+};
+
+struct mlx5_fc {
+ struct list_head list;
+ struct llist_node addlist;
+ struct llist_node dellist;
+
+ /* last{packets,bytes} members are used when calculating the delta since
+ * last reading
+ */
+ u64 lastpackets;
+ u64 lastbytes;
+
+ struct mlx5_fc_bulk *bulk;
+ u32 id;
+ bool aging;
+
+ struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev);
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool);
+static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool);
+static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc);
+
+/* locking scheme:
+ *
+ * It is the responsibility of the user to prevent concurrent calls or bad
+ * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference
+ * to struct mlx5_fc.
+ * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a
+ * dump (access to struct mlx5_fc) after a counter is destroyed.
+ *
+ * access to counter list:
+ * - create (user context)
+ * - mlx5_fc_create() only adds to an addlist to be used by
+ * mlx5_fc_stats_work(). addlist is a lockless single linked list
+ * that doesn't require any additional synchronization when adding single
+ * node.
+ * - spawn thread to do the actual destroy
+ *
+ * - destroy (user context)
+ * - add a counter to lockless dellist
+ * - spawn thread to do the actual del
+ *
+ * - dump (user context)
+ * user should not call dump after destroy
+ *
+ * - query (single thread workqueue context)
+ * destroy/dump - no conflict (see destroy)
+ * query/dump - packets and bytes might be inconsistent (since update is not
+ * atomic)
+ * query/create - no conflict (see create)
+ * since every create/destroy spawn the work, only after necessary time has
+ * elapsed, the thread will actually query the hardware.
+ */
+
+static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev,
+ u32 id)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ unsigned long next_id = (unsigned long)id + 1;
+ struct mlx5_fc *counter;
+ unsigned long tmp;
+
+ rcu_read_lock();
+ /* skip counters that are in idr, but not yet in counters list */
+ idr_for_each_entry_continue_ul(&fc_stats->counters_idr,
+ counter, tmp, next_id) {
+ if (!list_empty(&counter->list))
+ break;
+ }
+ rcu_read_unlock();
+
+ return counter ? &counter->list : &fc_stats->counters;
+}
+
+static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
+ struct mlx5_fc *counter)
+{
+ struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id);
+
+ list_add_tail(&counter->list, next);
+}
+
+static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
+ struct mlx5_fc *counter)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ list_del(&counter->list);
+
+ spin_lock(&fc_stats->counters_idr_lock);
+ WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
+ spin_unlock(&fc_stats->counters_idr_lock);
+}
+
+static int get_init_bulk_query_len(struct mlx5_core_dev *dev)
+{
+ return min_t(int, MLX5_INIT_COUNTERS_BULK,
+ (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
+
+static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
+{
+ return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
+ (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
+
+static void update_counter_cache(int index, u32 *bulk_raw_data,
+ struct mlx5_fc_cache *cache)
+{
+ void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data,
+ flow_statistics[index]);
+ u64 packets = MLX5_GET64(traffic_counter, stats, packets);
+ u64 bytes = MLX5_GET64(traffic_counter, stats, octets);
+
+ if (cache->packets == packets)
+ return;
+
+ cache->packets = packets;
+ cache->bytes = bytes;
+ cache->lastuse = jiffies;
+}
+
+static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
+ struct mlx5_fc *first,
+ u32 last_id)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ bool query_more_counters = (first->id <= last_id);
+ int cur_bulk_len = fc_stats->bulk_query_len;
+ u32 *data = fc_stats->bulk_query_out;
+ struct mlx5_fc *counter = first;
+ u32 bulk_base_id;
+ int bulk_len;
+ int err;
+
+ while (query_more_counters) {
+ /* first id must be aligned to 4 when using bulk query */
+ bulk_base_id = counter->id & ~0x3;
+
+ /* number of counters to query inc. the last counter */
+ bulk_len = min_t(int, cur_bulk_len,
+ ALIGN(last_id - bulk_base_id + 1, 4));
+
+ err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
+ data);
+ if (err) {
+ mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+ return;
+ }
+ query_more_counters = false;
+
+ list_for_each_entry_from(counter, &fc_stats->counters, list) {
+ int counter_index = counter->id - bulk_base_id;
+ struct mlx5_fc_cache *cache = &counter->cache;
+
+ if (counter->id >= bulk_base_id + bulk_len) {
+ query_more_counters = true;
+ break;
+ }
+
+ update_counter_cache(counter_index, data, cache);
+ }
+ }
+}
+
+static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+ mlx5_cmd_fc_free(dev, counter->id);
+ kfree(counter);
+}
+
+static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ if (counter->bulk)
+ mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter);
+ else
+ mlx5_fc_free(dev, counter);
+}
+
+static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ int max_bulk_len = get_max_bulk_query_len(dev);
+ unsigned long now = jiffies;
+ u32 *bulk_query_out_tmp;
+ int max_out_len;
+
+ if (fc_stats->bulk_query_alloc_failed &&
+ time_before(now, fc_stats->next_bulk_query_alloc))
+ return;
+
+ max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
+ bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL);
+ if (!bulk_query_out_tmp) {
+ mlx5_core_warn_once(dev,
+ "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n",
+ max_bulk_len);
+ fc_stats->bulk_query_alloc_failed = true;
+ fc_stats->next_bulk_query_alloc =
+ now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD;
+ return;
+ }
+
+ kfree(fc_stats->bulk_query_out);
+ fc_stats->bulk_query_out = bulk_query_out_tmp;
+ fc_stats->bulk_query_len = max_bulk_len;
+ if (fc_stats->bulk_query_alloc_failed) {
+ mlx5_core_info(dev,
+ "Flow counters bulk query buffer size increased, bulk_size(%d)\n",
+ max_bulk_len);
+ fc_stats->bulk_query_alloc_failed = false;
+ }
+}
+
+static void mlx5_fc_stats_work(struct work_struct *work)
+{
+ struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
+ priv.fc_stats.work.work);
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ /* Take dellist first to ensure that counters cannot be deleted before
+ * they are inserted.
+ */
+ struct llist_node *dellist = llist_del_all(&fc_stats->dellist);
+ struct llist_node *addlist = llist_del_all(&fc_stats->addlist);
+ struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
+ unsigned long now = jiffies;
+
+ if (addlist || !list_empty(&fc_stats->counters))
+ queue_delayed_work(fc_stats->wq, &fc_stats->work,
+ fc_stats->sampling_interval);
+
+ llist_for_each_entry(counter, addlist, addlist) {
+ mlx5_fc_stats_insert(dev, counter);
+ fc_stats->num_counters++;
+ }
+
+ llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
+ mlx5_fc_stats_remove(dev, counter);
+
+ mlx5_fc_release(dev, counter);
+ fc_stats->num_counters--;
+ }
+
+ if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) &&
+ fc_stats->num_counters > get_init_bulk_query_len(dev))
+ mlx5_fc_stats_bulk_query_size_increase(dev);
+
+ if (time_before(now, fc_stats->next_query) ||
+ list_empty(&fc_stats->counters))
+ return;
+ last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list);
+
+ counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
+ list);
+ if (counter)
+ mlx5_fc_stats_query_counter_range(dev, counter, last->id);
+
+ fc_stats->next_query = now + fc_stats->sampling_interval;
+}
+
+static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc *counter;
+ int err;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_cmd_fc_alloc(dev, &counter->id);
+ if (err) {
+ kfree(counter);
+ return ERR_PTR(err);
+ }
+
+ return counter;
+}
+
+static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct mlx5_fc *counter;
+
+ if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) {
+ counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool);
+ if (!IS_ERR(counter))
+ return counter;
+ }
+
+ return mlx5_fc_single_alloc(dev);
+}
+
+struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging)
+{
+ struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging);
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ int err;
+
+ if (IS_ERR(counter))
+ return counter;
+
+ INIT_LIST_HEAD(&counter->list);
+ counter->aging = aging;
+
+ if (aging) {
+ u32 id = counter->id;
+
+ counter->cache.lastuse = jiffies;
+ counter->lastbytes = counter->cache.bytes;
+ counter->lastpackets = counter->cache.packets;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&fc_stats->counters_idr_lock);
+
+ err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id,
+ GFP_NOWAIT);
+
+ spin_unlock(&fc_stats->counters_idr_lock);
+ idr_preload_end();
+ if (err)
+ goto err_out_alloc;
+
+ llist_add(&counter->addlist, &fc_stats->addlist);
+ }
+
+ return counter;
+
+err_out_alloc:
+ mlx5_fc_release(dev, counter);
+ return ERR_PTR(err);
+}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+ struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging);
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ if (aging)
+ mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+ return counter;
+}
+EXPORT_SYMBOL(mlx5_fc_create);
+
+u32 mlx5_fc_id(struct mlx5_fc *counter)
+{
+ return counter->id;
+}
+EXPORT_SYMBOL(mlx5_fc_id);
+
+void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ if (!counter)
+ return;
+
+ if (counter->aging) {
+ llist_add(&counter->dellist, &fc_stats->dellist);
+ mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+ return;
+ }
+
+ mlx5_fc_release(dev, counter);
+}
+EXPORT_SYMBOL(mlx5_fc_destroy);
+
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ int init_bulk_len;
+ int init_out_len;
+
+ spin_lock_init(&fc_stats->counters_idr_lock);
+ idr_init(&fc_stats->counters_idr);
+ INIT_LIST_HEAD(&fc_stats->counters);
+ init_llist_head(&fc_stats->addlist);
+ init_llist_head(&fc_stats->dellist);
+
+ init_bulk_len = get_init_bulk_query_len(dev);
+ init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len);
+ fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL);
+ if (!fc_stats->bulk_query_out)
+ return -ENOMEM;
+ fc_stats->bulk_query_len = init_bulk_len;
+
+ fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
+ if (!fc_stats->wq)
+ goto err_wq_create;
+
+ fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
+ INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
+
+ mlx5_fc_pool_init(&fc_stats->fc_pool, dev);
+ return 0;
+
+err_wq_create:
+ kfree(fc_stats->bulk_query_out);
+ return -ENOMEM;
+}
+
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct llist_node *tmplist;
+ struct mlx5_fc *counter;
+ struct mlx5_fc *tmp;
+
+ cancel_delayed_work_sync(&dev->priv.fc_stats.work);
+ destroy_workqueue(dev->priv.fc_stats.wq);
+ dev->priv.fc_stats.wq = NULL;
+
+ tmplist = llist_del_all(&fc_stats->addlist);
+ llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
+ mlx5_fc_release(dev, counter);
+
+ list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list)
+ mlx5_fc_release(dev, counter);
+
+ mlx5_fc_pool_cleanup(&fc_stats->fc_pool);
+ idr_destroy(&fc_stats->counters_idr);
+ kfree(fc_stats->bulk_query_out);
+}
+
+int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
+ u64 *packets, u64 *bytes)
+{
+ return mlx5_cmd_fc_query(dev, counter->id, packets, bytes);
+}
+EXPORT_SYMBOL(mlx5_fc_query);
+
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter)
+{
+ return counter->cache.lastuse;
+}
+
+void mlx5_fc_query_cached(struct mlx5_fc *counter,
+ u64 *bytes, u64 *packets, u64 *lastuse)
+{
+ struct mlx5_fc_cache c;
+
+ c = counter->cache;
+
+ *bytes = c.bytes - counter->lastbytes;
+ *packets = c.packets - counter->lastpackets;
+ *lastuse = c.lastuse;
+
+ counter->lastbytes = c.bytes;
+ counter->lastpackets = c.packets;
+}
+
+void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ queue_delayed_work(fc_stats->wq, dwork, delay);
+}
+
+void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
+ unsigned long interval)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ fc_stats->sampling_interval = min_t(unsigned long, interval,
+ fc_stats->sampling_interval);
+}
+
+/* Flow counter bluks */
+
+struct mlx5_fc_bulk {
+ struct list_head pool_list;
+ u32 base_id;
+ int bulk_len;
+ unsigned long *bitmask;
+ struct mlx5_fc fcs[];
+};
+
+static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
+ u32 id)
+{
+ counter->bulk = bulk;
+ counter->id = id;
+}
+
+static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk)
+{
+ return bitmap_weight(bulk->bitmask, bulk->bulk_len);
+}
+
+static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
+{
+ enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask;
+ struct mlx5_fc_bulk *bulk;
+ int err = -ENOMEM;
+ int bulk_len;
+ u32 base_id;
+ int i;
+
+ alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc);
+ bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1;
+
+ bulk = kvzalloc(struct_size(bulk, fcs, bulk_len), GFP_KERNEL);
+ if (!bulk)
+ goto err_alloc_bulk;
+
+ bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!bulk->bitmask)
+ goto err_alloc_bitmask;
+
+ err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id);
+ if (err)
+ goto err_mlx5_cmd_bulk_alloc;
+
+ bulk->base_id = base_id;
+ bulk->bulk_len = bulk_len;
+ for (i = 0; i < bulk_len; i++) {
+ mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i);
+ set_bit(i, bulk->bitmask);
+ }
+
+ return bulk;
+
+err_mlx5_cmd_bulk_alloc:
+ kvfree(bulk->bitmask);
+err_alloc_bitmask:
+ kvfree(bulk);
+err_alloc_bulk:
+ return ERR_PTR(err);
+}
+
+static int
+mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
+{
+ if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) {
+ mlx5_core_err(dev, "Freeing bulk before all counters were released\n");
+ return -EBUSY;
+ }
+
+ mlx5_cmd_fc_free(dev, bulk->base_id);
+ kvfree(bulk->bitmask);
+ kvfree(bulk);
+
+ return 0;
+}
+
+static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk)
+{
+ int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len);
+
+ if (free_fc_index >= bulk->bulk_len)
+ return ERR_PTR(-ENOSPC);
+
+ clear_bit(free_fc_index, bulk->bitmask);
+ return &bulk->fcs[free_fc_index];
+}
+
+static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
+{
+ int fc_index = fc->id - bulk->base_id;
+
+ if (test_bit(fc_index, bulk->bitmask))
+ return -EINVAL;
+
+ set_bit(fc_index, bulk->bitmask);
+ return 0;
+}
+
+/* Flow counters pool API */
+
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev)
+{
+ fc_pool->dev = dev;
+ mutex_init(&fc_pool->pool_lock);
+ INIT_LIST_HEAD(&fc_pool->fully_used);
+ INIT_LIST_HEAD(&fc_pool->partially_used);
+ INIT_LIST_HEAD(&fc_pool->unused);
+ fc_pool->available_fcs = 0;
+ fc_pool->used_fcs = 0;
+ fc_pool->threshold = 0;
+}
+
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool)
+{
+ struct mlx5_core_dev *dev = fc_pool->dev;
+ struct mlx5_fc_bulk *bulk;
+ struct mlx5_fc_bulk *tmp;
+
+ list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list)
+ mlx5_fc_bulk_destroy(dev, bulk);
+ list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list)
+ mlx5_fc_bulk_destroy(dev, bulk);
+ list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list)
+ mlx5_fc_bulk_destroy(dev, bulk);
+}
+
+static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool)
+{
+ fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD,
+ fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO);
+}
+
+static struct mlx5_fc_bulk *
+mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool)
+{
+ struct mlx5_core_dev *dev = fc_pool->dev;
+ struct mlx5_fc_bulk *new_bulk;
+
+ new_bulk = mlx5_fc_bulk_create(dev);
+ if (!IS_ERR(new_bulk))
+ fc_pool->available_fcs += new_bulk->bulk_len;
+ mlx5_fc_pool_update_threshold(fc_pool);
+ return new_bulk;
+}
+
+static void
+mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk)
+{
+ struct mlx5_core_dev *dev = fc_pool->dev;
+
+ fc_pool->available_fcs -= bulk->bulk_len;
+ mlx5_fc_bulk_destroy(dev, bulk);
+ mlx5_fc_pool_update_threshold(fc_pool);
+}
+
+static struct mlx5_fc *
+mlx5_fc_pool_acquire_from_list(struct list_head *src_list,
+ struct list_head *next_list,
+ bool move_non_full_bulk)
+{
+ struct mlx5_fc_bulk *bulk;
+ struct mlx5_fc *fc;
+
+ if (list_empty(src_list))
+ return ERR_PTR(-ENODATA);
+
+ bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list);
+ fc = mlx5_fc_bulk_acquire_fc(bulk);
+ if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0)
+ list_move(&bulk->pool_list, next_list);
+ return fc;
+}
+
+static struct mlx5_fc *
+mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool)
+{
+ struct mlx5_fc_bulk *new_bulk;
+ struct mlx5_fc *fc;
+
+ mutex_lock(&fc_pool->pool_lock);
+
+ fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used,
+ &fc_pool->fully_used, false);
+ if (IS_ERR(fc))
+ fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused,
+ &fc_pool->partially_used,
+ true);
+ if (IS_ERR(fc)) {
+ new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool);
+ if (IS_ERR(new_bulk)) {
+ fc = ERR_CAST(new_bulk);
+ goto out;
+ }
+ fc = mlx5_fc_bulk_acquire_fc(new_bulk);
+ list_add(&new_bulk->pool_list, &fc_pool->partially_used);
+ }
+ fc_pool->available_fcs--;
+ fc_pool->used_fcs++;
+
+out:
+ mutex_unlock(&fc_pool->pool_lock);
+ return fc;
+}
+
+static void
+mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc)
+{
+ struct mlx5_core_dev *dev = fc_pool->dev;
+ struct mlx5_fc_bulk *bulk = fc->bulk;
+ int bulk_free_fcs_amount;
+
+ mutex_lock(&fc_pool->pool_lock);
+
+ if (mlx5_fc_bulk_release_fc(bulk, fc)) {
+ mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n");
+ goto unlock;
+ }
+
+ fc_pool->available_fcs++;
+ fc_pool->used_fcs--;
+
+ bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk);
+ if (bulk_free_fcs_amount == 1)
+ list_move_tail(&bulk->pool_list, &fc_pool->partially_used);
+ if (bulk_free_fcs_amount == bulk->bulk_len) {
+ list_del(&bulk->pool_list);
+ if (fc_pool->available_fcs > fc_pool->threshold)
+ mlx5_fc_pool_free_bulk(fc_pool, bulk);
+ else
+ list_add(&bulk->pool_list, &fc_pool->unused);
+ }
+
+unlock:
+ mutex_unlock(&fc_pool->pool_lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
new file mode 100644
index 000000000..c14590acc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include "fs_ft_pool.h"
+
+/* Firmware currently has 4 pool of 4 sizes that it supports (FT_POOLS),
+ * and a virtual memory region of 16M (MLX5_FT_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via mlx5_ft_pool_get_avail_sz.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small and match firmware
+ * pools.
+ */
+#define FT_SIZE (16 * 1024 * 1024)
+static const unsigned int FT_POOLS[] = { 4 * 1024 * 1024,
+ 1 * 1024 * 1024,
+ 64 * 1024,
+ 128,
+ 1 /* size for termination tables */ };
+struct mlx5_ft_pool {
+ int ft_left[ARRAY_SIZE(FT_POOLS)];
+};
+
+int mlx5_ft_pool_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_ft_pool *ft_pool;
+ int i;
+
+ ft_pool = kzalloc(sizeof(*ft_pool), GFP_KERNEL);
+ if (!ft_pool)
+ return -ENOMEM;
+
+ for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--)
+ ft_pool->ft_left[i] = FT_SIZE / FT_POOLS[i];
+
+ dev->priv.ft_pool = ft_pool;
+ return 0;
+}
+
+void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev)
+{
+ kfree(dev->priv.ft_pool);
+}
+
+int
+mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type,
+ int desired_size)
+{
+ u32 max_ft_size = 1 << MLX5_CAP_FLOWTABLE_TYPE(dev, log_max_ft_size, table_type);
+ int i, found_i = -1;
+
+ for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+ if (dev->priv.ft_pool->ft_left[i] && FT_POOLS[i] >= desired_size &&
+ FT_POOLS[i] <= max_ft_size) {
+ found_i = i;
+ if (desired_size != POOL_NEXT_SIZE)
+ break;
+ }
+ }
+
+ if (found_i != -1) {
+ --dev->priv.ft_pool->ft_left[found_i];
+ return FT_POOLS[found_i];
+ }
+
+ return 0;
+}
+
+void
+mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz)
+{
+ int i;
+
+ if (!sz)
+ return;
+
+ for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+ if (sz == FT_POOLS[i]) {
+ ++dev->priv.ft_pool->ft_left[i];
+ return;
+ }
+ }
+
+ WARN_ONCE(1, "Couldn't find size %d in flow table size pool", sz);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h
new file mode 100644
index 000000000..25f4274b3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_FS_FT_POOL_H__
+#define __MLX5_FS_FT_POOL_H__
+
+#include <linux/mlx5/driver.h>
+#include "fs_core.h"
+
+#define POOL_NEXT_SIZE 0
+
+int mlx5_ft_pool_init(struct mlx5_core_dev *dev);
+void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev);
+
+int
+mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type,
+ int desired_size);
+void
+mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz);
+
+#endif /* __MLX5_FS_FT_POOL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
new file mode 100644
index 000000000..f34e758a2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -0,0 +1,851 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_core.h"
+#include "../../mlxfw/mlxfw.h"
+#include "lib/tout.h"
+
+enum {
+ MCQS_IDENTIFIER_BOOT_IMG = 0x1,
+ MCQS_IDENTIFIER_OEM_NVCONFIG = 0x4,
+ MCQS_IDENTIFIER_MLNX_NVCONFIG = 0x5,
+ MCQS_IDENTIFIER_CS_TOKEN = 0x6,
+ MCQS_IDENTIFIER_DBG_TOKEN = 0x7,
+ MCQS_IDENTIFIER_GEARBOX = 0xA,
+};
+
+enum {
+ MCQS_UPDATE_STATE_IDLE,
+ MCQS_UPDATE_STATE_IN_PROGRESS,
+ MCQS_UPDATE_STATE_APPLIED,
+ MCQS_UPDATE_STATE_ACTIVE,
+ MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET,
+ MCQS_UPDATE_STATE_FAILED,
+ MCQS_UPDATE_STATE_CANCELED,
+ MCQS_UPDATE_STATE_BUSY,
+};
+
+enum {
+ MCQI_INFO_TYPE_CAPABILITIES = 0x0,
+ MCQI_INFO_TYPE_VERSION = 0x1,
+ MCQI_INFO_TYPE_ACTIVATION_METHOD = 0x5,
+};
+
+enum {
+ MCQI_FW_RUNNING_VERSION = 0,
+ MCQI_FW_STORED_VERSION = 1,
+};
+
+int mlx5_query_board_id(struct mlx5_core_dev *dev)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+ u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {};
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
+ err = mlx5_cmd_exec_inout(dev, query_adapter, in, out);
+ if (err)
+ goto out;
+
+ memcpy(dev->board_id,
+ MLX5_ADDR_OF(query_adapter_out, out,
+ query_adapter_struct.vsd_contd_psid),
+ MLX5_FLD_SZ_BYTES(query_adapter_out,
+ query_adapter_struct.vsd_contd_psid));
+
+out:
+ kfree(out);
+ return err;
+}
+
+int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+ u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {};
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
+ err = mlx5_cmd_exec_inout(mdev, query_adapter, in, out);
+ if (err)
+ goto out;
+
+ *vendor_id = MLX5_GET(query_adapter_out, out,
+ query_adapter_struct.ieee_vendor_id);
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_query_vendor_id);
+
+static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev)
+{
+ return mlx5_query_pcam_reg(dev, dev->caps.pcam,
+ MLX5_PCAM_FEATURE_ENHANCED_FEATURES,
+ MLX5_PCAM_REGS_5000_TO_507F);
+}
+
+static int mlx5_get_mcam_access_reg_group(struct mlx5_core_dev *dev,
+ enum mlx5_mcam_reg_groups group)
+{
+ return mlx5_query_mcam_reg(dev, dev->caps.mcam[group],
+ MLX5_MCAM_FEATURE_ENHANCED_FEATURES, group);
+}
+
+static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev)
+{
+ return mlx5_query_qcam_reg(dev, dev->caps.qcam,
+ MLX5_QCAM_FEATURE_ENHANCED_FEATURES,
+ MLX5_QCAM_REGS_FIRST_128);
+}
+
+int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_GEN(dev, port_selection_cap)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, hca_cap_2)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, pg)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, atomic)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, roce)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, nic_flow_table) ||
+ MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vport_group_manager) &&
+ MLX5_ESWITCH_MANAGER(dev)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_ESWITCH_MANAGER(dev)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vector_calc)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, qos)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_QOS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, debug))
+ mlx5_core_get_caps(dev, MLX5_CAP_DEBUG);
+
+ if (MLX5_CAP_GEN(dev, pcam_reg))
+ mlx5_get_pcam_reg(dev);
+
+ if (MLX5_CAP_GEN(dev, mcam_reg)) {
+ mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_FIRST_128);
+ mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9080_0x90FF);
+ mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9100_0x917F);
+ }
+
+ if (MLX5_CAP_GEN(dev, qcam_reg))
+ mlx5_get_qcam_reg(dev);
+
+ if (MLX5_CAP_GEN(dev, device_memory)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_MEM);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, tls_tx) || MLX5_CAP_GEN(dev, tls_rx)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_TLS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN_64(dev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, ipsec_offload)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_IPSEC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, shampo)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_SHAMPO);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN_64(dev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_MACSEC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, adv_virtualization)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ADV_VIRTUALIZATION);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
+{
+ u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {};
+ int i;
+
+ MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
+
+ if (MLX5_CAP_GEN(dev, sw_owner_id)) {
+ for (i = 0; i < 4; i++)
+ MLX5_ARRAY_SET(init_hca_in, in, sw_owner_id, i,
+ sw_owner_id[i]);
+ }
+
+ if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) &&
+ dev->priv.sw_vhca_id > 0)
+ MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id);
+
+ return mlx5_cmd_exec_in(dev, init_hca, in);
+}
+
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {};
+
+ MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ return mlx5_cmd_exec_in(dev, teardown_hca, in);
+}
+
+int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+ int force_state;
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev, force_teardown)) {
+ mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ MLX5_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE);
+
+ ret = mlx5_cmd_exec_polling(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ force_state = MLX5_GET(teardown_hca_out, out, state);
+ if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
+ mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
+{
+ unsigned long end, delay_ms = mlx5_tout_ms(dev, TEARDOWN);
+ u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {};
+ int state;
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev, fast_teardown)) {
+ mlx5_core_dbg(dev, "fast teardown is not supported in the firmware\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ MLX5_SET(teardown_hca_in, in, profile,
+ MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN);
+
+ ret = mlx5_cmd_exec_inout(dev, teardown_hca, in, out);
+ if (ret)
+ return ret;
+
+ state = MLX5_GET(teardown_hca_out, out, state);
+ if (state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
+ mlx5_core_warn(dev, "teardown with fast mode failed\n");
+ return -EIO;
+ }
+
+ mlx5_set_nic_state(dev, MLX5_NIC_IFC_DISABLED);
+
+ /* Loop until device state turns to disable */
+ end = jiffies + msecs_to_jiffies(delay_ms);
+ do {
+ if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ break;
+
+ cond_resched();
+ } while (!time_after(jiffies, end));
+
+ if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+ dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
+ mlx5_get_nic_state(dev), delay_ms);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+enum mlxsw_reg_mcc_instruction {
+ MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01,
+ MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02,
+ MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03,
+ MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04,
+ MLX5_REG_MCC_INSTRUCTION_ACTIVATE = 0x06,
+ MLX5_REG_MCC_INSTRUCTION_CANCEL = 0x08,
+};
+
+static int mlx5_reg_mcc_set(struct mlx5_core_dev *dev,
+ enum mlxsw_reg_mcc_instruction instr,
+ u16 component_index, u32 update_handle,
+ u32 component_size)
+{
+ u32 out[MLX5_ST_SZ_DW(mcc_reg)];
+ u32 in[MLX5_ST_SZ_DW(mcc_reg)];
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(mcc_reg, in, instruction, instr);
+ MLX5_SET(mcc_reg, in, component_index, component_index);
+ MLX5_SET(mcc_reg, in, update_handle, update_handle);
+ MLX5_SET(mcc_reg, in, component_size, component_size);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCC, 0, 1);
+}
+
+static int mlx5_reg_mcc_query(struct mlx5_core_dev *dev,
+ u32 *update_handle, u8 *error_code,
+ u8 *control_state)
+{
+ u32 out[MLX5_ST_SZ_DW(mcc_reg)];
+ u32 in[MLX5_ST_SZ_DW(mcc_reg)];
+ int err;
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+ MLX5_SET(mcc_reg, in, update_handle, *update_handle);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCC, 0, 0);
+ if (err)
+ goto out;
+
+ *update_handle = MLX5_GET(mcc_reg, out, update_handle);
+ *error_code = MLX5_GET(mcc_reg, out, error_code);
+ *control_state = MLX5_GET(mcc_reg, out, control_state);
+
+out:
+ return err;
+}
+
+static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev,
+ u32 update_handle,
+ u32 offset, u16 size,
+ u8 *data)
+{
+ int err, in_size = MLX5_ST_SZ_BYTES(mcda_reg) + size;
+ u32 out[MLX5_ST_SZ_DW(mcda_reg)];
+ int i, j, dw_size = size >> 2;
+ __be32 data_element;
+ u32 *in;
+
+ in = kzalloc(in_size, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(mcda_reg, in, update_handle, update_handle);
+ MLX5_SET(mcda_reg, in, offset, offset);
+ MLX5_SET(mcda_reg, in, size, size);
+
+ for (i = 0; i < dw_size; i++) {
+ j = i * 4;
+ data_element = htonl(*(u32 *)&data[j]);
+ memcpy(MLX5_ADDR_OF(mcda_reg, in, data) + j, &data_element, 4);
+ }
+
+ err = mlx5_core_access_reg(dev, in, in_size, out,
+ sizeof(out), MLX5_REG_MCDA, 0, 1);
+ kfree(in);
+ return err;
+}
+
+static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev,
+ u16 component_index, bool read_pending,
+ u8 info_type, u16 data_size, void *mcqi_data)
+{
+ u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {};
+ u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {};
+ void *data;
+ int err;
+
+ MLX5_SET(mcqi_reg, in, component_index, component_index);
+ MLX5_SET(mcqi_reg, in, read_pending_component, read_pending);
+ MLX5_SET(mcqi_reg, in, info_type, info_type);
+ MLX5_SET(mcqi_reg, in, data_size, data_size);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ MLX5_ST_SZ_BYTES(mcqi_reg) + data_size,
+ MLX5_REG_MCQI, 0, 0);
+ if (err)
+ return err;
+
+ data = MLX5_ADDR_OF(mcqi_reg, out, data);
+ memcpy(mcqi_data, data, data_size);
+
+ return 0;
+}
+
+static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index,
+ u32 *max_component_size, u8 *log_mcda_word_size,
+ u16 *mcda_max_write_size)
+{
+ u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {};
+ int err;
+
+ err = mlx5_reg_mcqi_query(dev, component_index, 0,
+ MCQI_INFO_TYPE_CAPABILITIES,
+ MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg);
+ if (err)
+ return err;
+
+ *max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size);
+ *log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size);
+ *mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size);
+
+ return 0;
+}
+
+struct mlx5_mlxfw_dev {
+ struct mlxfw_dev mlxfw_dev;
+ struct mlx5_core_dev *mlx5_core_dev;
+};
+
+static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev,
+ u16 component_index, u32 *p_max_size,
+ u8 *p_align_bits, u16 *p_max_write_size)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) {
+ mlx5_core_warn(dev, "caps query isn't supported by running FW\n");
+ return -EOPNOTSUPP;
+ }
+
+ return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size,
+ p_align_bits, p_max_write_size);
+}
+
+static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+ u8 control_state, error_code;
+ int err;
+
+ *fwhandle = 0;
+ err = mlx5_reg_mcc_query(dev, fwhandle, &error_code, &control_state);
+ if (err)
+ return err;
+
+ if (control_state != MLXFW_FSM_STATE_IDLE)
+ return -EBUSY;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE,
+ 0, *fwhandle, 0);
+}
+
+static int mlx5_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u16 component_index, u32 component_size)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT,
+ component_index, fwhandle, component_size);
+}
+
+static int mlx5_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u8 *data, u16 size, u32 offset)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcda_set(dev, fwhandle, offset, size, data);
+}
+
+static int mlx5_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u16 component_index)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT,
+ component_index, fwhandle, 0);
+}
+
+static int mlx5_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_ACTIVATE, 0,
+ fwhandle, 0);
+}
+
+static int mlx5_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ enum mlxfw_fsm_state *fsm_state,
+ enum mlxfw_fsm_state_err *fsm_state_err)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+ u8 control_state, error_code;
+ int err;
+
+ err = mlx5_reg_mcc_query(dev, &fwhandle, &error_code, &control_state);
+ if (err)
+ return err;
+
+ *fsm_state = control_state;
+ *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code,
+ MLXFW_FSM_STATE_ERR_MAX);
+ return 0;
+}
+
+static void mlx5_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0);
+}
+
+static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0,
+ fwhandle, 0);
+}
+
+static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+ u32 out[MLX5_ST_SZ_DW(mirc_reg)];
+ u32 in[MLX5_ST_SZ_DW(mirc_reg)];
+ unsigned long exp_time;
+ int err;
+
+ exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FSM_REACTIVATE));
+
+ if (!MLX5_CAP_MCAM_REG2(dev, mirc))
+ return -EOPNOTSUPP;
+
+ memset(in, 0, sizeof(in));
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MIRC, 0, 1);
+ if (err)
+ return err;
+
+ do {
+ memset(out, 0, sizeof(out));
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MIRC, 0, 0);
+ if (err)
+ return err;
+
+ *status = MLX5_GET(mirc_reg, out, status_code);
+ if (*status != MLXFW_FSM_REACTIVATE_STATUS_BUSY)
+ return 0;
+
+ msleep(20);
+ } while (time_before(jiffies, exp_time));
+
+ return 0;
+}
+
+static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = {
+ .component_query = mlx5_component_query,
+ .fsm_lock = mlx5_fsm_lock,
+ .fsm_component_update = mlx5_fsm_component_update,
+ .fsm_block_download = mlx5_fsm_block_download,
+ .fsm_component_verify = mlx5_fsm_component_verify,
+ .fsm_activate = mlx5_fsm_activate,
+ .fsm_reactivate = mlx5_fsm_reactivate,
+ .fsm_query_state = mlx5_fsm_query_state,
+ .fsm_cancel = mlx5_fsm_cancel,
+ .fsm_release = mlx5_fsm_release
+};
+
+int mlx5_firmware_flash(struct mlx5_core_dev *dev,
+ const struct firmware *firmware,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_mlxfw_dev mlx5_mlxfw_dev = {
+ .mlxfw_dev = {
+ .ops = &mlx5_mlxfw_dev_ops,
+ .psid = dev->board_id,
+ .psid_size = strlen(dev->board_id),
+ .devlink = priv_to_devlink(dev),
+ },
+ .mlx5_core_dev = dev
+ };
+
+ if (!MLX5_CAP_GEN(dev, mcam_reg) ||
+ !MLX5_CAP_MCAM_REG(dev, mcqi) ||
+ !MLX5_CAP_MCAM_REG(dev, mcc) ||
+ !MLX5_CAP_MCAM_REG(dev, mcda)) {
+ pr_info("%s flashing isn't supported by the running FW\n", __func__);
+ return -EOPNOTSUPP;
+ }
+
+ return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev,
+ firmware, extack);
+}
+
+static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev,
+ u16 component_index, bool read_pending,
+ u32 *mcqi_version_out)
+{
+ return mlx5_reg_mcqi_query(dev, component_index, read_pending,
+ MCQI_INFO_TYPE_VERSION,
+ MLX5_ST_SZ_BYTES(mcqi_version),
+ mcqi_version_out);
+}
+
+static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out,
+ u16 component_index)
+{
+ u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg);
+ u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+ int err;
+
+ memset(out, 0, out_sz);
+
+ MLX5_SET(mcqs_reg, in, component_index, component_index);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ out_sz, MLX5_REG_MCQS, 0, 0);
+ return err;
+}
+
+/* scans component index sequentially, to find the boot img index */
+static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+ u16 identifier, component_idx = 0;
+ bool quit;
+ int err;
+
+ do {
+ err = mlx5_reg_mcqs_query(dev, out, component_idx);
+ if (err)
+ return err;
+
+ identifier = MLX5_GET(mcqs_reg, out, identifier);
+ quit = !!MLX5_GET(mcqs_reg, out, last_index_flag);
+ quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG;
+ } while (!quit && ++component_idx);
+
+ if (identifier != MCQS_IDENTIFIER_BOOT_IMG) {
+ mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n",
+ component_idx);
+ return -EOPNOTSUPP;
+ }
+
+ return component_idx;
+}
+
+static int
+mlx5_fw_image_pending(struct mlx5_core_dev *dev,
+ int component_index,
+ bool *pending_version_exists)
+{
+ u32 out[MLX5_ST_SZ_DW(mcqs_reg)];
+ u8 component_update_state;
+ int err;
+
+ err = mlx5_reg_mcqs_query(dev, out, component_index);
+ if (err)
+ return err;
+
+ component_update_state = MLX5_GET(mcqs_reg, out, component_update_state);
+
+ if (component_update_state == MCQS_UPDATE_STATE_IDLE) {
+ *pending_version_exists = false;
+ } else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) {
+ *pending_version_exists = true;
+ } else {
+ mlx5_core_warn(dev,
+ "mcqs: can't read pending fw version while fw state is %d\n",
+ component_update_state);
+ return -ENODATA;
+ }
+ return 0;
+}
+
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+ u32 *running_ver, u32 *pending_ver)
+{
+ u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {};
+ bool pending_version_exists;
+ int component_index;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) ||
+ !MLX5_CAP_MCAM_REG(dev, mcqs)) {
+ mlx5_core_warn(dev, "fw query isn't supported by the FW\n");
+ return -EOPNOTSUPP;
+ }
+
+ component_index = mlx5_get_boot_img_component_index(dev);
+ if (component_index < 0)
+ return component_index;
+
+ err = mlx5_reg_mcqi_version_query(dev, component_index,
+ MCQI_FW_RUNNING_VERSION,
+ reg_mcqi_version);
+ if (err)
+ return err;
+
+ *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+ err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists);
+ if (err)
+ return err;
+
+ if (!pending_version_exists) {
+ *pending_ver = 0;
+ return 0;
+ }
+
+ err = mlx5_reg_mcqi_version_query(dev, component_index,
+ MCQI_FW_STORED_VERSION,
+ reg_mcqi_version);
+ if (err)
+ return err;
+
+ *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
new file mode 100644
index 000000000..dec1492da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -0,0 +1,546 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "fw_reset.h"
+#include "diag/fw_tracer.h"
+#include "lib/tout.h"
+
+enum {
+ MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
+ MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
+ MLX5_FW_RESET_FLAGS_PENDING_COMP,
+ MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
+ MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED
+};
+
+struct mlx5_fw_reset {
+ struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
+ struct workqueue_struct *wq;
+ struct work_struct fw_live_patch_work;
+ struct work_struct reset_request_work;
+ struct work_struct reset_reload_work;
+ struct work_struct reset_now_work;
+ struct work_struct reset_abort_work;
+ unsigned long reset_flags;
+ struct timer_list timer;
+ struct completion done;
+ int ret;
+};
+
+void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ if (enable)
+ clear_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+ else
+ set_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+}
+
+bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ return !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+}
+
+static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
+ u8 reset_type_sel, u8 sync_resp, bool sync_start)
+{
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+
+ MLX5_SET(mfrl_reg, in, reset_level, reset_level);
+ MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_resp, sync_resp);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, sync_start);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1);
+}
+
+static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
+ u8 *reset_type, u8 *reset_state)
+{
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ int err;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 0);
+ if (err)
+ return err;
+
+ if (reset_level)
+ *reset_level = MLX5_GET(mfrl_reg, out, reset_level);
+ if (reset_type)
+ *reset_type = MLX5_GET(mfrl_reg, out, reset_type);
+ if (reset_state)
+ *reset_state = MLX5_GET(mfrl_reg, out, reset_state);
+
+ return 0;
+}
+
+int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
+{
+ return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
+}
+
+static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
+ struct netlink_ext_ack *extack)
+{
+ u8 reset_state;
+
+ if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
+ goto out;
+
+ switch (reset_state) {
+ case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION:
+ case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered");
+ return -EBUSY;
+ case MLX5_MFRL_REG_RESET_STATE_TIMEOUT:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout");
+ return -ETIMEDOUT;
+ case MLX5_MFRL_REG_RESET_STATE_NACK:
+ NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset");
+ return -EPERM;
+ }
+
+out:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset failed");
+ return -EIO;
+}
+
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ int err;
+
+ set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+
+ MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3);
+ MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1);
+ err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MFRL, 0, 1, false);
+ if (!err)
+ return 0;
+
+ clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state))
+ return mlx5_fw_reset_get_reset_state_err(dev, extack);
+
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed");
+ return mlx5_cmd_check(dev, err, in, out);
+}
+
+int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
+{
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
+}
+
+static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ /* if this is the driver that initiated the fw reset, devlink completed the reload */
+ if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
+ complete(&fw_reset->done);
+ } else {
+ mlx5_unload_one(dev, false);
+ if (mlx5_health_wait_pci_up(dev))
+ mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
+ else
+ mlx5_load_one(dev, true);
+ devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
+ BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+ BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
+ }
+}
+
+static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ del_timer_sync(&fw_reset->timer);
+}
+
+static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already cleared\n");
+ return -EALREADY;
+ }
+
+ mlx5_stop_sync_reset_poll(dev);
+ if (poll_health)
+ mlx5_start_health_poll(dev);
+ return 0;
+}
+
+static void mlx5_sync_reset_reload_work(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_reload_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ mlx5_sync_reset_clear_reset_requested(dev, false);
+ mlx5_enter_error_state(dev, true);
+ mlx5_fw_reset_complete_reload(dev);
+}
+
+#define MLX5_RESET_POLL_INTERVAL (HZ / 10)
+static void poll_sync_reset(struct timer_list *t)
+{
+ struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ u32 fatal_error;
+
+ if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+ return;
+
+ fatal_error = mlx5_health_check_fatal_sensors(dev);
+
+ if (fatal_error) {
+ mlx5_core_warn(dev, "Got Device Reset\n");
+ if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ else
+ mlx5_core_err(dev, "Device is being removed, Drop new reset work\n");
+ return;
+ }
+
+ mod_timer(&fw_reset->timer, round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL));
+}
+
+static void mlx5_start_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ timer_setup(&fw_reset->timer, poll_sync_reset, 0);
+ fw_reset->timer.expires = round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL);
+ add_timer(&fw_reset->timer);
+}
+
+static int mlx5_fw_reset_set_reset_sync_ack(struct mlx5_core_dev *dev)
+{
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 1, false);
+}
+
+static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
+{
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
+}
+
+static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already set\n");
+ return -EALREADY;
+ }
+ mlx5_stop_health_poll(dev, true);
+ mlx5_start_sync_reset_poll(dev);
+ return 0;
+}
+
+static void mlx5_fw_live_patch_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ fw_live_patch_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+ fw_rev_min(dev), fw_rev_sub(dev));
+
+ if (mlx5_fw_tracer_reload(dev->tracer))
+ mlx5_core_err(dev, "Failed to reload FW tracer\n");
+}
+
+static void mlx5_sync_reset_request_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_request_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ int err;
+
+ if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags)) {
+ err = mlx5_fw_reset_set_reset_sync_nack(dev);
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
+ err ? "Failed" : "Sent");
+ return;
+ }
+ if (mlx5_sync_reset_set_reset_requested(dev))
+ return;
+
+ err = mlx5_fw_reset_set_reset_sync_ack(dev);
+ if (err)
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
+ else
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
+}
+
+static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+{
+ struct pci_bus *bridge_bus = dev->pdev->bus;
+ struct pci_dev *bridge = bridge_bus->self;
+ u16 reg16, dev_id, sdev_id;
+ unsigned long timeout;
+ struct pci_dev *sdev;
+ int cap, err;
+ u32 reg32;
+
+ /* Check that all functions under the pci bridge are PFs of
+ * this device otherwise fail this function.
+ */
+ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+ if (err)
+ return err;
+ list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+ err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id);
+ if (err)
+ return err;
+ if (sdev_id != dev_id)
+ return -EPERM;
+ }
+
+ cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
+ if (!cap)
+ return -EOPNOTSUPP;
+
+ list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+ pci_save_state(sdev);
+ pci_cfg_access_lock(sdev);
+ }
+ /* PCI link toggle */
+ err = pcie_capability_set_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD);
+ if (err)
+ return err;
+ msleep(500);
+ err = pcie_capability_clear_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD);
+ if (err)
+ return err;
+
+ /* Check link */
+ err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, &reg32);
+ if (err)
+ return err;
+ if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) {
+ mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32);
+ msleep(1000);
+ goto restore;
+ }
+
+ timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_TOGGLE));
+ do {
+ err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, &reg16);
+ if (err)
+ return err;
+ if (reg16 & PCI_EXP_LNKSTA_DLLLA)
+ break;
+ msleep(20);
+ } while (!time_after(jiffies, timeout));
+
+ if (reg16 & PCI_EXP_LNKSTA_DLLLA) {
+ mlx5_core_info(dev, "PCI Link up\n");
+ } else {
+ mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n",
+ reg16, mlx5_tout_ms(dev, PCI_TOGGLE));
+ err = -ETIMEDOUT;
+ }
+
+ do {
+ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &reg16);
+ if (err)
+ return err;
+ if (reg16 == dev_id)
+ break;
+ msleep(20);
+ } while (!time_after(jiffies, timeout));
+
+ if (reg16 == dev_id) {
+ mlx5_core_info(dev, "Firmware responds to PCI config cycles again\n");
+ } else {
+ mlx5_core_err(dev, "Firmware is not responsive (0x%04x) after %llu ms\n",
+ reg16, mlx5_tout_ms(dev, PCI_TOGGLE));
+ err = -ETIMEDOUT;
+ }
+
+restore:
+ list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+ pci_cfg_access_unlock(sdev);
+ pci_restore_state(sdev);
+ }
+
+ return err;
+}
+
+static void mlx5_sync_reset_now_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_now_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ int err;
+
+ if (mlx5_sync_reset_clear_reset_requested(dev, false))
+ return;
+
+ mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
+
+ err = mlx5_cmd_fast_teardown_hca(dev);
+ if (err) {
+ mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err);
+ goto done;
+ }
+
+ err = mlx5_pci_link_toggle(dev);
+ if (err) {
+ mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
+ set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
+ }
+
+ mlx5_enter_error_state(dev, true);
+done:
+ fw_reset->ret = err;
+ mlx5_fw_reset_complete_reload(dev);
+}
+
+static void mlx5_sync_reset_abort_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_abort_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
+ return;
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
+}
+
+static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe)
+{
+ struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe;
+ u8 sync_event_rst_type;
+
+ sync_fw_update_eqe = &eqe->data.sync_fw_update;
+ sync_event_rst_type = sync_fw_update_eqe->sync_rst_state & SYNC_RST_STATE_MASK;
+ switch (sync_event_rst_type) {
+ case MLX5_SYNC_RST_STATE_RESET_REQUEST:
+ queue_work(fw_reset->wq, &fw_reset->reset_request_work);
+ break;
+ case MLX5_SYNC_RST_STATE_RESET_NOW:
+ queue_work(fw_reset->wq, &fw_reset->reset_now_work);
+ break;
+ case MLX5_SYNC_RST_STATE_RESET_ABORT:
+ queue_work(fw_reset->wq, &fw_reset->reset_abort_work);
+ break;
+ }
+}
+
+static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
+ struct mlx5_eqe *eqe = data;
+
+ if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ return NOTIFY_DONE;
+
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
+ queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+ break;
+ case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
+ mlx5_sync_reset_events_handle(fw_reset, eqe);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
+{
+ unsigned long pci_sync_update_timeout = mlx5_tout_ms(dev, PCI_SYNC_UPDATE);
+ unsigned long timeout = msecs_to_jiffies(pci_sync_update_timeout);
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ int err;
+
+ if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
+ mlx5_core_warn(dev, "FW sync reset timeout after %lu seconds\n",
+ pci_sync_update_timeout / 1000);
+ err = -ETIMEDOUT;
+ goto out;
+ }
+ err = fw_reset->ret;
+ if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) {
+ mlx5_unload_one_devl_locked(dev, false);
+ mlx5_load_one_devl_locked(dev, true);
+ }
+out:
+ clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ return err;
+}
+
+void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT);
+ mlx5_eq_notifier_register(dev, &fw_reset->nb);
+}
+
+void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
+{
+ mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
+}
+
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
+ cancel_work_sync(&fw_reset->fw_live_patch_work);
+ cancel_work_sync(&fw_reset->reset_request_work);
+ cancel_work_sync(&fw_reset->reset_reload_work);
+ cancel_work_sync(&fw_reset->reset_now_work);
+ cancel_work_sync(&fw_reset->reset_abort_work);
+}
+
+int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
+
+ if (!fw_reset)
+ return -ENOMEM;
+ fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events");
+ if (!fw_reset->wq) {
+ kfree(fw_reset);
+ return -ENOMEM;
+ }
+
+ fw_reset->dev = dev;
+ dev->priv.fw_reset = fw_reset;
+
+ INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event);
+ INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event);
+ INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
+ INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
+ INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
+
+ init_completion(&fw_reset->done);
+ return 0;
+}
+
+void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ destroy_workqueue(fw_reset->wq);
+ kfree(dev->priv.fw_reset);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
new file mode 100644
index 000000000..dc141c7e6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_FW_RESET_H
+#define __MLX5_FW_RESET_H
+
+#include "mlx5_core.h"
+
+void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable);
+bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev);
+int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type);
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
+ struct netlink_ext_ack *extack);
+int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
+
+int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev);
+int mlx5_fw_reset_init(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
new file mode 100644
index 000000000..e42e4ac23
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -0,0 +1,941 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/vmalloc.h>
+#include <linux/hardirq.h>
+#include <linux/mlx5/driver.h>
+#include <linux/kern_levels.h>
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+#include "lib/pci_vsc.h"
+#include "lib/tout.h"
+#include "diag/fw_tracer.h"
+
+enum {
+ MAX_MISSES = 3,
+};
+
+enum {
+ MLX5_HEALTH_SYNDR_FW_ERR = 0x1,
+ MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7,
+ MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR = 0x8,
+ MLX5_HEALTH_SYNDR_CRC_ERR = 0x9,
+ MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa,
+ MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb,
+ MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc,
+ MLX5_HEALTH_SYNDR_EQ_ERR = 0xd,
+ MLX5_HEALTH_SYNDR_EQ_INV = 0xe,
+ MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf,
+ MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10
+};
+
+enum {
+ MLX5_DROP_NEW_HEALTH_WORK,
+};
+
+enum {
+ MLX5_SENSOR_NO_ERR = 0,
+ MLX5_SENSOR_PCI_COMM_ERR = 1,
+ MLX5_SENSOR_PCI_ERR = 2,
+ MLX5_SENSOR_NIC_DISABLED = 3,
+ MLX5_SENSOR_NIC_SW_RESET = 4,
+ MLX5_SENSOR_FW_SYND_RFR = 5,
+};
+
+enum {
+ MLX5_SEVERITY_MASK = 0x7,
+ MLX5_SEVERITY_VALID_MASK = 0x8,
+};
+
+u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
+{
+ return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
+}
+
+void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
+{
+ u32 cur_cmdq_addr_l_sz;
+
+ cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
+ iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
+ state << MLX5_NIC_IFC_OFFSET,
+ &dev->iseg->cmdq_addr_l_sz);
+}
+
+static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+
+ /* Offline PCI reads return 0xffffffff */
+ return (ioread32be(&h->fw_ver) == 0xffffffff);
+}
+
+static int mlx5_health_get_rfr(u8 rfr_severity)
+{
+ return rfr_severity >> MLX5_RFR_BIT_OFFSET;
+}
+
+static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ u8 synd = ioread8(&h->synd);
+ u8 rfr;
+
+ rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity));
+
+ if (rfr && synd)
+ mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
+ return rfr && synd;
+}
+
+u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
+{
+ if (sensor_pci_not_working(dev))
+ return MLX5_SENSOR_PCI_COMM_ERR;
+ if (pci_channel_offline(dev->pdev))
+ return MLX5_SENSOR_PCI_ERR;
+ if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ return MLX5_SENSOR_NIC_DISABLED;
+ if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
+ return MLX5_SENSOR_NIC_SW_RESET;
+ if (sensor_fw_synd_rfr(dev))
+ return MLX5_SENSOR_FW_SYND_RFR;
+
+ return MLX5_SENSOR_NO_ERR;
+}
+
+static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
+{
+ enum mlx5_vsc_state state;
+ int ret;
+
+ if (!mlx5_core_is_pf(dev))
+ return -EBUSY;
+
+ /* Try to lock GW access, this stage doesn't return
+ * EBUSY because locked GW does not mean that other PF
+ * already started the reset.
+ */
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret == -EBUSY)
+ return -EINVAL;
+ if (ret)
+ return ret;
+
+ state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
+ /* At this stage, if the return status == EBUSY, then we know
+ * for sure that another PF started the reset, so don't allow
+ * another reset.
+ */
+ ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
+ if (ret)
+ mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+
+ /* Unlock GW access */
+ mlx5_vsc_gw_unlock(dev);
+
+ return ret;
+}
+
+static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
+{
+ bool supported = (ioread32be(&dev->iseg->initializing) >>
+ MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
+ u32 fatal_error;
+
+ if (!supported)
+ return false;
+
+ /* The reset only needs to be issued by one PF. The health buffer is
+ * shared between all functions, and will be cleared during a reset.
+ * Check again to avoid a redundant 2nd reset. If the fatal errors was
+ * PCI related a reset won't help.
+ */
+ fatal_error = mlx5_health_check_fatal_sensors(dev);
+ if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
+ fatal_error == MLX5_SENSOR_NIC_DISABLED ||
+ fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
+ mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
+ return false;
+ }
+
+ mlx5_core_warn(dev, "Issuing FW Reset\n");
+ /* Write the NIC interface field to initiate the reset, the command
+ * interface address also resides here, don't overwrite it.
+ */
+ mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
+
+ return true;
+}
+
+static void enter_error_state(struct mlx5_core_dev *dev, bool force)
+{
+ if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ mlx5_cmd_flush(dev);
+ }
+
+ mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
+}
+
+void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
+{
+ bool err_detected = false;
+
+ /* Mark the device as fatal in order to abort FW commands */
+ if ((mlx5_health_check_fatal_sensors(dev) || force) &&
+ dev->state == MLX5_DEVICE_STATE_UP) {
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ err_detected = true;
+ }
+ mutex_lock(&dev->intf_state_mutex);
+ if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto unlock;/* a previous error is still being handled */
+
+ enter_error_state(dev, force);
+unlock:
+ mutex_unlock(&dev->intf_state_mutex);
+}
+
+void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
+{
+ unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
+ int lock = -EBUSY;
+
+ mutex_lock(&dev->intf_state_mutex);
+ if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto unlock;
+
+ mlx5_core_err(dev, "start\n");
+
+ if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
+ /* Get cr-dump and reset FW semaphore */
+ lock = lock_sem_sw_reset(dev, true);
+
+ if (lock == -EBUSY) {
+ delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
+ goto recover_from_sw_reset;
+ }
+ /* Execute SW reset */
+ reset_fw_if_needed(dev);
+ }
+
+recover_from_sw_reset:
+ /* Recover from SW reset */
+ end = jiffies + msecs_to_jiffies(delay_ms);
+ do {
+ if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ break;
+
+ msleep(20);
+ } while (!time_after(jiffies, end));
+
+ if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+ dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
+ mlx5_get_nic_state(dev), delay_ms);
+ }
+
+ /* Release FW semaphore if you are the lock owner */
+ if (!lock)
+ lock_sem_sw_reset(dev, false);
+
+ mlx5_core_err(dev, "end\n");
+
+unlock:
+ mutex_unlock(&dev->intf_state_mutex);
+}
+
+static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
+{
+ u8 nic_interface = mlx5_get_nic_state(dev);
+
+ switch (nic_interface) {
+ case MLX5_NIC_IFC_FULL:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
+ break;
+
+ case MLX5_NIC_IFC_DISABLED:
+ mlx5_core_warn(dev, "starting teardown\n");
+ break;
+
+ case MLX5_NIC_IFC_NO_DRAM_NIC:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
+ break;
+
+ case MLX5_NIC_IFC_SW_RESET:
+ /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
+ * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
+ * and this is a VF), this is not recoverable by SW reset.
+ * Logging of this is handled elsewhere.
+ * 2. FW reset has been issued by another function, driver can
+ * be reloaded to recover after the mode switches to
+ * MLX5_NIC_IFC_DISABLED.
+ */
+ if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
+ mlx5_core_warn(dev, "NIC SW reset in progress\n");
+ break;
+
+ default:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
+ nic_interface);
+ }
+
+ mlx5_disable_device(dev);
+}
+
+int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
+{
+ unsigned long end;
+
+ end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
+ while (sensor_pci_not_working(dev)) {
+ if (time_after(jiffies, end))
+ return -ETIMEDOUT;
+ msleep(100);
+ }
+ return 0;
+}
+
+static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
+{
+ mlx5_core_warn(dev, "handling bad device here\n");
+ mlx5_handle_bad_state(dev);
+ if (mlx5_health_wait_pci_up(dev)) {
+ mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
+ return -EIO;
+ }
+ mlx5_core_err(dev, "starting health recovery flow\n");
+ if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
+ mlx5_core_err(dev, "health recovery failed\n");
+ return -EIO;
+ }
+
+ mlx5_core_info(dev, "health recovery succeeded\n");
+ return 0;
+}
+
+static const char *hsynd_str(u8 synd)
+{
+ switch (synd) {
+ case MLX5_HEALTH_SYNDR_FW_ERR:
+ return "firmware internal error";
+ case MLX5_HEALTH_SYNDR_IRISC_ERR:
+ return "irisc not responding";
+ case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
+ return "unrecoverable hardware error";
+ case MLX5_HEALTH_SYNDR_CRC_ERR:
+ return "firmware CRC error";
+ case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
+ return "ICM fetch PCI error";
+ case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
+ return "HW fatal error\n";
+ case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
+ return "async EQ buffer overrun";
+ case MLX5_HEALTH_SYNDR_EQ_ERR:
+ return "EQ error";
+ case MLX5_HEALTH_SYNDR_EQ_INV:
+ return "Invalid EQ referenced";
+ case MLX5_HEALTH_SYNDR_FFSER_ERR:
+ return "FFSER error";
+ case MLX5_HEALTH_SYNDR_HIGH_TEMP:
+ return "High temperature";
+ default:
+ return "unrecognized error";
+ }
+}
+
+static const char *mlx5_loglevel_str(int level)
+{
+ switch (level) {
+ case LOGLEVEL_EMERG:
+ return "EMERGENCY";
+ case LOGLEVEL_ALERT:
+ return "ALERT";
+ case LOGLEVEL_CRIT:
+ return "CRITICAL";
+ case LOGLEVEL_ERR:
+ return "ERROR";
+ case LOGLEVEL_WARNING:
+ return "WARNING";
+ case LOGLEVEL_NOTICE:
+ return "NOTICE";
+ case LOGLEVEL_INFO:
+ return "INFO";
+ case LOGLEVEL_DEBUG:
+ return "DEBUG";
+ }
+ return "Unknown log level";
+}
+
+static int mlx5_health_get_severity(u8 rfr_severity)
+{
+ return rfr_severity & MLX5_SEVERITY_VALID_MASK ?
+ rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR;
+}
+
+static void print_health_info(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ u8 rfr_severity;
+ int severity;
+ int i;
+
+ /* If the syndrome is 0, the device is OK and no need to print buffer */
+ if (!ioread8(&h->synd))
+ return;
+
+ if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) {
+ mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n");
+ return;
+ }
+
+ rfr_severity = ioread8(&h->rfr_severity);
+ severity = mlx5_health_get_severity(rfr_severity);
+ mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n",
+ hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity));
+
+ for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
+ mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i,
+ ioread32be(h->assert_var + i));
+
+ mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
+ mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
+ mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev),
+ fw_rev_sub(dev));
+ mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time));
+ mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+ mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity));
+ mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity));
+ mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index));
+ mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd),
+ hsynd_str(ioread8(&h->synd)));
+ mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
+ mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver));
+}
+
+static int
+mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ u8 synd;
+ int err;
+
+ synd = ioread8(&h->synd);
+ err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
+ if (err || !synd)
+ return err;
+ return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
+}
+
+struct mlx5_fw_reporter_ctx {
+ u8 err_synd;
+ int miss_counter;
+};
+
+static int
+mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
+ struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
+{
+ int err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "syndrome",
+ fw_reporter_ctx->err_synd);
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
+ fw_reporter_ctx->miss_counter);
+ if (err)
+ return err;
+ return 0;
+}
+
+static int
+mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ u8 rfr_severity;
+ int err;
+ int i;
+
+ if (!ioread8(&h->synd))
+ return 0;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, "health buffer");
+ if (err)
+ return err;
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) {
+ err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
+ if (err)
+ return err;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
+ ioread32be(&h->assert_exit_ptr));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
+ ioread32be(&h->assert_callra));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
+ if (err)
+ return err;
+ rfr_severity = ioread8(&h->rfr_severity);
+ err = devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity));
+ if (err)
+ return err;
+ err = devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity));
+ if (err)
+ return err;
+ err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
+ ioread8(&h->irisc_index));
+ if (err)
+ return err;
+ err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd",
+ ioread16be(&h->ext_synd));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver",
+ ioread32be(&h->fw_ver));
+ if (err)
+ return err;
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+ return devlink_fmsg_pair_nest_end(fmsg);
+}
+
+static int
+mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *priv_ctx,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+ int err;
+
+ err = mlx5_fw_tracer_trigger_core_dump_general(dev);
+ if (err)
+ return err;
+
+ if (priv_ctx) {
+ struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+ err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+ if (err)
+ return err;
+ }
+
+ err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
+ if (err)
+ return err;
+ return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
+}
+
+static void mlx5_fw_reporter_err_work(struct work_struct *work)
+{
+ struct mlx5_fw_reporter_ctx fw_reporter_ctx;
+ struct mlx5_core_health *health;
+
+ health = container_of(work, struct mlx5_core_health, report_work);
+
+ if (IS_ERR_OR_NULL(health->fw_reporter))
+ return;
+
+ fw_reporter_ctx.err_synd = health->synd;
+ fw_reporter_ctx.miss_counter = health->miss_counter;
+ if (fw_reporter_ctx.err_synd) {
+ devlink_health_report(health->fw_reporter,
+ "FW syndrome reported", &fw_reporter_ctx);
+ return;
+ }
+ if (fw_reporter_ctx.miss_counter)
+ devlink_health_report(health->fw_reporter,
+ "FW miss counter reported",
+ &fw_reporter_ctx);
+}
+
+static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
+ .name = "fw",
+ .diagnose = mlx5_fw_reporter_diagnose,
+ .dump = mlx5_fw_reporter_dump,
+};
+
+static int
+mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
+ void *priv_ctx,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+
+ return mlx5_health_try_recover(dev);
+}
+
+static int
+mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *priv_ctx,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+ u32 crdump_size = dev->priv.health.crdump_size;
+ u32 *cr_data;
+ int err;
+
+ if (!mlx5_core_is_pf(dev))
+ return -EPERM;
+
+ cr_data = kvmalloc(crdump_size, GFP_KERNEL);
+ if (!cr_data)
+ return -ENOMEM;
+ err = mlx5_crdump_collect(dev, cr_data);
+ if (err)
+ goto free_data;
+
+ if (priv_ctx) {
+ struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+ err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+ if (err)
+ goto free_data;
+ }
+
+ err = devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size);
+
+free_data:
+ kvfree(cr_data);
+ return err;
+}
+
+static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
+{
+ struct mlx5_fw_reporter_ctx fw_reporter_ctx;
+ struct mlx5_core_health *health;
+ struct mlx5_core_dev *dev;
+ struct devlink *devlink;
+ struct mlx5_priv *priv;
+
+ health = container_of(work, struct mlx5_core_health, fatal_report_work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+ devlink = priv_to_devlink(dev);
+
+ mutex_lock(&dev->intf_state_mutex);
+ if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) {
+ mlx5_core_err(dev, "health works are not permitted at this stage\n");
+ mutex_unlock(&dev->intf_state_mutex);
+ return;
+ }
+ mutex_unlock(&dev->intf_state_mutex);
+ enter_error_state(dev, false);
+ if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+ devl_lock(devlink);
+ if (mlx5_health_try_recover(dev))
+ mlx5_core_err(dev, "health recovery failed\n");
+ devl_unlock(devlink);
+ return;
+ }
+ fw_reporter_ctx.err_synd = health->synd;
+ fw_reporter_ctx.miss_counter = health->miss_counter;
+ if (devlink_health_report(health->fw_fatal_reporter,
+ "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
+ /* If recovery wasn't performed, due to grace period,
+ * unload the driver. This ensures that the driver
+ * closes all its resources and it is not subjected to
+ * requests from the kernel.
+ */
+ mlx5_core_err(dev, "Driver is in error state. Unloading\n");
+ mlx5_unload_one(dev, false);
+ }
+}
+
+static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
+ .name = "fw_fatal",
+ .recover = mlx5_fw_fatal_reporter_recover,
+ .dump = mlx5_fw_fatal_reporter_dump,
+};
+
+#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
+#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
+#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
+#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD
+
+static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct devlink *devlink = priv_to_devlink(dev);
+ u64 grace_period;
+
+ if (mlx5_core_is_ecpf(dev)) {
+ grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
+ } else if (mlx5_core_is_pf(dev)) {
+ grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD;
+ } else {
+ /* VF or SF */
+ grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
+ }
+
+ health->fw_reporter =
+ devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
+ 0, dev);
+ if (IS_ERR(health->fw_reporter))
+ mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
+ PTR_ERR(health->fw_reporter));
+
+ health->fw_fatal_reporter =
+ devlink_health_reporter_create(devlink,
+ &mlx5_fw_fatal_reporter_ops,
+ grace_period,
+ dev);
+ if (IS_ERR(health->fw_fatal_reporter))
+ mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
+ PTR_ERR(health->fw_fatal_reporter));
+}
+
+static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ if (!IS_ERR_OR_NULL(health->fw_reporter))
+ devlink_health_reporter_destroy(health->fw_reporter);
+
+ if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
+ devlink_health_reporter_destroy(health->fw_fatal_reporter);
+}
+
+static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
+{
+ unsigned long next;
+
+ get_random_bytes(&next, sizeof(next));
+ next %= HZ;
+ next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
+
+ return next;
+}
+
+void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+ queue_work(health->wq, &health->fatal_report_work);
+ else
+ mlx5_core_err(dev, "new health works are not permitted at this stage\n");
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+}
+
+#define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60)
+static void mlx5_health_log_ts_update(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {};
+ struct mlx5_core_health *health;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ u64 now_us;
+
+ health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ now_us = ktime_to_us(ktime_get_real());
+
+ MLX5_SET(mrtc_reg, in, time_h, now_us >> 32);
+ MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF);
+ mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1);
+
+ queue_delayed_work(health->wq, &health->update_fw_log_ts_work,
+ msecs_to_jiffies(MLX5_MSEC_PER_HOUR));
+}
+
+static void poll_health(struct timer_list *t)
+{
+ struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ u32 fatal_error;
+ u8 prev_synd;
+ u32 count;
+
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto out;
+
+ fatal_error = mlx5_health_check_fatal_sensors(dev);
+
+ if (fatal_error && !health->fatal_error) {
+ mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
+ dev->priv.health.fatal_error = fatal_error;
+ print_health_info(dev);
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ mlx5_trigger_health_work(dev);
+ return;
+ }
+
+ count = ioread32be(health->health_counter);
+ if (count == health->prev)
+ ++health->miss_counter;
+ else
+ health->miss_counter = 0;
+
+ health->prev = count;
+ if (health->miss_counter == MAX_MISSES) {
+ mlx5_core_err(dev, "device's health compromised - reached miss count\n");
+ print_health_info(dev);
+ queue_work(health->wq, &health->report_work);
+ }
+
+ prev_synd = health->synd;
+ health->synd = ioread8(&h->synd);
+ if (health->synd && health->synd != prev_synd)
+ queue_work(health->wq, &health->report_work);
+
+out:
+ mod_timer(&health->timer, get_next_poll_jiffies(dev));
+}
+
+void mlx5_start_health_poll(struct mlx5_core_dev *dev)
+{
+ u64 poll_interval_ms = mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ timer_setup(&health->timer, poll_health, 0);
+ health->fatal_error = MLX5_SENSOR_NO_ERR;
+ clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ health->health = &dev->iseg->health;
+ health->health_counter = &dev->iseg->health_counter;
+
+ health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
+ add_timer(&health->timer);
+}
+
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ if (disable_health) {
+ spin_lock_irqsave(&health->wq_lock, flags);
+ set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+ }
+
+ del_timer_sync(&health->timer);
+}
+
+void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
+ queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
+}
+
+void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+ cancel_delayed_work_sync(&health->update_fw_log_ts_work);
+ cancel_work_sync(&health->report_work);
+ cancel_work_sync(&health->fatal_report_work);
+}
+
+void mlx5_health_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ cancel_delayed_work_sync(&health->update_fw_log_ts_work);
+ destroy_workqueue(health->wq);
+ mlx5_fw_reporters_destroy(dev);
+}
+
+int mlx5_health_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health;
+ char *name;
+
+ mlx5_fw_reporters_create(dev);
+
+ health = &dev->priv.health;
+ name = kmalloc(64, GFP_KERNEL);
+ if (!name)
+ goto out_err;
+
+ strcpy(name, "mlx5_health");
+ strcat(name, dev_name(dev->device));
+ health->wq = create_singlethread_workqueue(name);
+ kfree(name);
+ if (!health->wq)
+ goto out_err;
+ spin_lock_init(&health->wq_lock);
+ INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
+ INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
+ INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update);
+
+ return 0;
+
+out_err:
+ mlx5_fw_reporters_destroy(dev);
+ return -ENOMEM;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
new file mode 100644
index 000000000..e09518f88
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+#include "ipoib.h"
+#include "en/fs_ethtool.h"
+
+static void mlx5i_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_drvinfo(priv, drvinfo);
+ strscpy(drvinfo->driver, KBUILD_MODNAME "[ib_ipoib]",
+ sizeof(drvinfo->driver));
+}
+
+static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_strings(priv, stringset, data);
+}
+
+static int mlx5i_get_sset_count(struct net_device *dev, int sset)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ return mlx5e_ethtool_get_sset_count(priv, sset);
+}
+
+static void mlx5i_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
+}
+
+static int mlx5i_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ return mlx5e_ethtool_set_ringparam(priv, param);
+}
+
+static void mlx5i_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param,
+ struct kernel_ethtool_ringparam *kernel_param,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
+}
+
+static int mlx5i_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(dev);
+ struct mlx5e_priv *epriv = mlx5i_epriv(dev);
+
+ /* rtnl lock protects from race between this ethtool op and sub
+ * interface ndo_init/uninit.
+ */
+ ASSERT_RTNL();
+ if (ipriv->num_sub_interfaces > 0) {
+ mlx5_core_warn(epriv->mdev,
+ "can't change number of channels for interfaces with sub interfaces (%u)\n",
+ ipriv->num_sub_interfaces);
+ return -EINVAL;
+ }
+
+ return mlx5e_ethtool_set_channels(epriv, ch);
+}
+
+static void mlx5i_get_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_channels(priv, ch);
+}
+
+static int mlx5i_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack);
+}
+
+static int mlx5i_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
+}
+
+static int mlx5i_get_ts_info(struct net_device *netdev,
+ struct ethtool_ts_info *info)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_get_ts_info(priv, info);
+}
+
+static int mlx5i_flash_device(struct net_device *netdev,
+ struct ethtool_flash *flash)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_flash_device(priv, flash);
+}
+
+static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width)
+{
+ switch (width) {
+ case MLX5_PTYS_WIDTH_1X: return 1;
+ case MLX5_PTYS_WIDTH_2X: return 2;
+ case MLX5_PTYS_WIDTH_4X: return 4;
+ case MLX5_PTYS_WIDTH_8X: return 8;
+ case MLX5_PTYS_WIDTH_12X: return 12;
+ default: return -1;
+ }
+}
+
+enum mlx5_ptys_rate {
+ MLX5_PTYS_RATE_SDR = 1 << 0,
+ MLX5_PTYS_RATE_DDR = 1 << 1,
+ MLX5_PTYS_RATE_QDR = 1 << 2,
+ MLX5_PTYS_RATE_FDR10 = 1 << 3,
+ MLX5_PTYS_RATE_FDR = 1 << 4,
+ MLX5_PTYS_RATE_EDR = 1 << 5,
+ MLX5_PTYS_RATE_HDR = 1 << 6,
+ MLX5_PTYS_RATE_NDR = 1 << 7,
+};
+
+static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
+{
+ switch (rate) {
+ case MLX5_PTYS_RATE_SDR: return 2500;
+ case MLX5_PTYS_RATE_DDR: return 5000;
+ case MLX5_PTYS_RATE_QDR:
+ case MLX5_PTYS_RATE_FDR10: return 10000;
+ case MLX5_PTYS_RATE_FDR: return 14000;
+ case MLX5_PTYS_RATE_EDR: return 25000;
+ case MLX5_PTYS_RATE_HDR: return 50000;
+ case MLX5_PTYS_RATE_NDR: return 100000;
+ default: return -1;
+ }
+}
+
+static u32 mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
+{
+ int rate, width;
+
+ rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper);
+ if (rate < 0)
+ return SPEED_UNKNOWN;
+ width = mlx5_ptys_width_enum_to_int(ib_link_width_oper);
+ if (width < 0)
+ return SPEED_UNKNOWN;
+
+ return rate * width;
+}
+
+static int mlx5i_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 ib_link_width_oper;
+ u16 ib_proto_oper;
+ int speed, ret;
+
+ ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper,
+ 1);
+ if (ret)
+ return ret;
+
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+
+ speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper);
+ link_ksettings->base.speed = speed;
+ link_ksettings->base.duplex = speed == SPEED_UNKNOWN ? DUPLEX_UNKNOWN : DUPLEX_FULL;
+
+ link_ksettings->base.port = PORT_OTHER;
+
+ link_ksettings->base.autoneg = AUTONEG_DISABLE;
+
+ return 0;
+}
+
+static u32 mlx5i_flow_type_mask(u32 flow_type)
+{
+ return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+}
+
+static int mlx5i_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct ethtool_rx_flow_spec *fs = &cmd->fs;
+
+ if (mlx5i_flow_type_mask(fs->flow_type) == ETHER_FLOW)
+ return -EINVAL;
+
+ return mlx5e_ethtool_set_rxnfc(priv, cmd);
+}
+
+static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
+ u32 *rule_locs)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
+ * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
+ * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
+ * is compiled out via CONFIG_MLX5_EN_RXNFC=n.
+ */
+ if (info->cmd == ETHTOOL_GRXRINGS) {
+ info->data = priv->channels.params.num_channels;
+ return 0;
+ }
+
+ return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs);
+}
+
+const struct ethtool_ops mlx5i_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
+ .get_drvinfo = mlx5i_get_drvinfo,
+ .get_strings = mlx5i_get_strings,
+ .get_sset_count = mlx5i_get_sset_count,
+ .get_ethtool_stats = mlx5i_get_ethtool_stats,
+ .get_ringparam = mlx5i_get_ringparam,
+ .set_ringparam = mlx5i_set_ringparam,
+ .flash_device = mlx5i_flash_device,
+ .get_channels = mlx5i_get_channels,
+ .set_channels = mlx5i_set_channels,
+ .get_coalesce = mlx5i_get_coalesce,
+ .set_coalesce = mlx5i_set_coalesce,
+ .get_ts_info = mlx5i_get_ts_info,
+ .get_rxnfc = mlx5i_get_rxnfc,
+ .set_rxnfc = mlx5i_set_rxnfc,
+ .get_link_ksettings = mlx5i_get_link_ksettings,
+ .get_link = ethtool_op_get_link,
+};
+
+const struct ethtool_ops mlx5i_pkey_ethtool_ops = {
+ .get_drvinfo = mlx5i_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_ts_info = mlx5i_get_ts_info,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
new file mode 100644
index 000000000..aed4e8961
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -0,0 +1,842 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <linux/mlx5/fs.h>
+#include "en.h"
+#include "en/params.h"
+#include "ipoib.h"
+#include "en/fs_ethtool.h"
+
+#define IB_DEFAULT_Q_KEY 0xb1b
+#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9
+
+static int mlx5i_open(struct net_device *netdev);
+static int mlx5i_close(struct net_device *netdev);
+static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu);
+
+static const struct net_device_ops mlx5i_netdev_ops = {
+ .ndo_open = mlx5i_open,
+ .ndo_stop = mlx5i_close,
+ .ndo_get_stats64 = mlx5i_get_stats,
+ .ndo_init = mlx5i_dev_init,
+ .ndo_uninit = mlx5i_dev_cleanup,
+ .ndo_change_mtu = mlx5i_change_mtu,
+ .ndo_eth_ioctl = mlx5i_ioctl,
+};
+
+/* IPoIB mlx5 netdev profile */
+static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false);
+ mlx5e_set_rq_type(mdev, params);
+ mlx5e_init_rq_type_params(mdev, params);
+
+ /* RQ size in ipoib by default is 512 */
+ params->log_rq_mtu_frames = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+ MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+ params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+ params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
+
+ /* CQE compression is not supported for IPoIB */
+ params->rx_cqe_compress_def = false;
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+}
+
+/* Called directly after IPoIB netdevice was created to initialize SW structs */
+int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ netif_carrier_off(netdev);
+ mlx5e_set_netdev_mtu_boundaries(priv);
+ netdev->mtu = netdev->max_mtu;
+
+ mlx5e_build_nic_params(priv, NULL, netdev->mtu);
+ mlx5i_build_nic_params(mdev, &priv->channels.params);
+
+ mlx5e_timestamp_init(priv);
+
+ /* netdev init */
+ netdev->hw_features |= NETIF_F_SG;
+ netdev->hw_features |= NETIF_F_IP_CSUM;
+ netdev->hw_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_features |= NETIF_F_GRO;
+ netdev->hw_features |= NETIF_F_TSO;
+ netdev->hw_features |= NETIF_F_TSO6;
+ netdev->hw_features |= NETIF_F_RXCSUM;
+ netdev->hw_features |= NETIF_F_RXHASH;
+
+ netdev->netdev_ops = &mlx5i_netdev_ops;
+ netdev->ethtool_ops = &mlx5i_ethtool_ops;
+
+ return 0;
+}
+
+/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
+void mlx5i_cleanup(struct mlx5e_priv *priv)
+{
+ mlx5e_priv_cleanup(priv);
+}
+
+static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
+{
+ struct rtnl_link_stats64 s = {};
+ int i, j;
+
+ for (i = 0; i < priv->stats_nch; i++) {
+ struct mlx5e_channel_stats *channel_stats;
+ struct mlx5e_rq_stats *rq_stats;
+
+ channel_stats = priv->channel_stats[i];
+ rq_stats = &channel_stats->rq;
+
+ s.rx_packets += rq_stats->packets;
+ s.rx_bytes += rq_stats->bytes;
+
+ for (j = 0; j < priv->max_opened_tc; j++) {
+ struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+
+ s.tx_packets += sq_stats->packets;
+ s.tx_bytes += sq_stats->bytes;
+ s.tx_dropped += sq_stats->dropped;
+ }
+ }
+
+ memset(&priv->stats.sw, 0, sizeof(s));
+
+ priv->stats.sw.rx_packets = s.rx_packets;
+ priv->stats.sw.rx_bytes = s.rx_bytes;
+ priv->stats.sw.tx_packets = s.tx_packets;
+ priv->stats.sw.tx_bytes = s.tx_bytes;
+ priv->stats.sw.tx_queue_dropped = s.tx_dropped;
+}
+
+void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+
+ mlx5i_grp_sw_update_stats(priv);
+
+ stats->rx_packets = sstats->rx_packets;
+ stats->rx_bytes = sstats->rx_bytes;
+ stats->tx_packets = sstats->tx_packets;
+ stats->tx_bytes = sstats->tx_bytes;
+ stats->tx_dropped = sstats->tx_queue_dropped;
+}
+
+struct net_device *mlx5i_parent_get(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv, *parent_ipriv;
+ struct net_device *parent_dev;
+ int parent_ifindex;
+
+ ipriv = priv->ppriv;
+
+ parent_ifindex = netdev->netdev_ops->ndo_get_iflink(netdev);
+ parent_dev = dev_get_by_index(dev_net(netdev), parent_ifindex);
+ if (!parent_dev)
+ return NULL;
+
+ parent_ipriv = netdev_priv(parent_dev);
+
+ ASSERT_RTNL();
+ parent_ipriv->num_sub_interfaces++;
+
+ ipriv->parent_dev = parent_dev;
+
+ return parent_dev;
+}
+
+void mlx5i_parent_put(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv, *parent_ipriv;
+
+ ipriv = priv->ppriv;
+ parent_ipriv = netdev_priv(ipriv->parent_dev);
+
+ ASSERT_RTNL();
+ parent_ipriv->num_sub_interfaces--;
+
+ dev_put(ipriv->parent_dev);
+}
+
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ int ret;
+
+ {
+ u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
+ u32 *qpc;
+
+ qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index,
+ ipriv->pkey_index);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1);
+ MLX5_SET(qpc, qpc, q_key, IB_DEFAULT_Q_KEY);
+
+ MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+ MLX5_SET(rst2init_qp_in, in, qpn, ipriv->qpn);
+ ret = mlx5_cmd_exec_in(mdev, rst2init_qp, in);
+ if (ret)
+ goto err_qp_modify_to_err;
+ }
+ {
+ u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
+
+ MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+ MLX5_SET(init2rtr_qp_in, in, qpn, ipriv->qpn);
+ ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
+ if (ret)
+ goto err_qp_modify_to_err;
+ }
+ {
+ u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
+
+ MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+ MLX5_SET(rtr2rts_qp_in, in, qpn, ipriv->qpn);
+ ret = mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
+ if (ret)
+ goto err_qp_modify_to_err;
+ }
+ return 0;
+
+err_qp_modify_to_err:
+ {
+ u32 in[MLX5_ST_SZ_DW(qp_2err_in)] = {};
+
+ MLX5_SET(qp_2err_in, in, opcode, MLX5_CMD_OP_2ERR_QP);
+ MLX5_SET(qp_2err_in, in, qpn, ipriv->qpn);
+ mlx5_cmd_exec_in(mdev, qp_2err, in);
+ }
+ return ret;
+}
+
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {};
+
+ MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP);
+ MLX5_SET(qp_2rst_in, in, qpn, ipriv->qpn);
+ mlx5_cmd_exec_in(mdev, qp_2rst, in);
+}
+
+#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
+
+int mlx5i_create_underlay_qp(struct mlx5e_priv *priv)
+{
+ const unsigned char *dev_addr = priv->netdev->dev_addr;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {};
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ void *addr_path;
+ int qpn = 0;
+ int ret = 0;
+ void *qpc;
+
+ if (MLX5_CAP_GEN(priv->mdev, mkey_by_name)) {
+ qpn = (dev_addr[1] << 16) + (dev_addr[2] << 8) + dev_addr[3];
+ MLX5_SET(create_qp_in, in, input_qpn, qpn);
+ }
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(priv->mdev));
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, ulp_stateless_offload_mode,
+ MLX5_QP_ENHANCED_ULP_STATELESS_MODE);
+
+ addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
+ MLX5_SET(ads, addr_path, vhca_port_num, 1);
+ MLX5_SET(ads, addr_path, grh, 1);
+
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+ ret = mlx5_cmd_exec_inout(priv->mdev, create_qp, in, out);
+ if (ret)
+ return ret;
+
+ ipriv->qpn = MLX5_GET(create_qp_out, out, qpn);
+
+ return 0;
+}
+
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, qpn);
+ mlx5_cmd_exec_in(mdev, destroy_qp, in);
+}
+
+int mlx5i_update_nic_rx(struct mlx5e_priv *priv)
+{
+ return mlx5e_refresh_tirs(priv, true, true);
+}
+
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+ void *tisc;
+
+ tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
+
+ return mlx5e_create_tis(mdev, in, tisn);
+}
+
+static int mlx5i_init_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ int err;
+
+ err = mlx5i_create_underlay_qp(priv);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5i_create_tis(priv->mdev, ipriv->qpn, &priv->tisn[0][0]);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
+ goto err_destroy_underlay_qp;
+ }
+
+ return 0;
+
+err_destroy_underlay_qp:
+ mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn);
+ return err;
+}
+
+static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[0][0]);
+ mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn);
+}
+
+static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_namespace *ns =
+ mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
+ int err;
+
+
+ if (!ns)
+ return -EINVAL;
+
+ mlx5e_fs_set_ns(priv->fs, ns, false);
+ err = mlx5e_arfs_create_tables(priv->fs, priv->rx_res,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
+ err);
+ priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
+ }
+
+ err = mlx5e_create_ttc_table(priv->fs, priv->rx_res);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
+ err);
+ goto err_destroy_arfs_tables;
+ }
+
+ mlx5e_ethtool_init_steering(priv->fs);
+
+ return 0;
+
+err_destroy_arfs_tables:
+ mlx5e_arfs_destroy_tables(priv->fs,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
+
+ return err;
+}
+
+static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_ttc_table(priv->fs);
+ mlx5e_arfs_destroy_tables(priv->fs,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
+ mlx5e_ethtool_cleanup_steering(priv->fs);
+}
+
+static int mlx5i_init_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ priv->fs = mlx5e_fs_init(priv->profile, mdev,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
+ if (!priv->fs) {
+ netdev_err(priv->netdev, "FS allocation failed\n");
+ return -ENOMEM;
+ }
+
+ priv->rx_res = mlx5e_rx_res_alloc();
+ if (!priv->rx_res) {
+ err = -ENOMEM;
+ goto err_free_fs;
+ }
+
+ mlx5e_create_q_counters(priv);
+
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ goto err_destroy_q_counters;
+ }
+
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+ priv->max_nch, priv->drop_rq.rqn,
+ &priv->channels.params.packet_merge,
+ priv->channels.params.num_channels);
+ if (err)
+ goto err_close_drop_rq;
+
+ err = mlx5i_create_flow_steering(priv);
+ if (err)
+ goto err_destroy_rx_res;
+
+ return 0;
+
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
+err_destroy_q_counters:
+ mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+err_free_fs:
+ mlx5e_fs_cleanup(priv->fs);
+ return err;
+}
+
+static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
+{
+ mlx5i_destroy_flow_steering(priv);
+ mlx5e_rx_res_destroy(priv->rx_res);
+ mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_destroy_q_counters(priv);
+ mlx5e_rx_res_free(priv->rx_res);
+ priv->rx_res = NULL;
+ mlx5e_fs_cleanup(priv->fs);
+}
+
+/* The stats groups order is opposite to the update_stats() order calls */
+static mlx5e_stats_grp_t mlx5i_stats_grps[] = {
+ &MLX5E_STATS_GRP(sw),
+ &MLX5E_STATS_GRP(qcnt),
+ &MLX5E_STATS_GRP(vnic_env),
+ &MLX5E_STATS_GRP(vport),
+ &MLX5E_STATS_GRP(802_3),
+ &MLX5E_STATS_GRP(2863),
+ &MLX5E_STATS_GRP(2819),
+ &MLX5E_STATS_GRP(phy),
+ &MLX5E_STATS_GRP(pcie),
+ &MLX5E_STATS_GRP(per_prio),
+ &MLX5E_STATS_GRP(pme),
+ &MLX5E_STATS_GRP(channels),
+ &MLX5E_STATS_GRP(per_port_buff_congest),
+};
+
+static unsigned int mlx5i_stats_grps_num(struct mlx5e_priv *priv)
+{
+ return ARRAY_SIZE(mlx5i_stats_grps);
+}
+
+static const struct mlx5e_profile mlx5i_nic_profile = {
+ .init = mlx5i_init,
+ .cleanup = mlx5i_cleanup,
+ .init_tx = mlx5i_init_tx,
+ .cleanup_tx = mlx5i_cleanup_tx,
+ .init_rx = mlx5i_init_rx,
+ .cleanup_rx = mlx5i_cleanup_rx,
+ .enable = NULL, /* mlx5i_enable */
+ .disable = NULL, /* mlx5i_disable */
+ .update_rx = mlx5i_update_nic_rx,
+ .update_stats = NULL, /* mlx5i_update_stats */
+ .update_carrier = NULL, /* no HW update in IB link */
+ .rx_handlers = &mlx5i_rx_handlers,
+ .max_tc = MLX5I_MAX_NUM_TC,
+ .stats_grps = mlx5i_stats_grps,
+ .stats_grps_num = mlx5i_stats_grps_num,
+};
+
+/* mlx5i netdev NDos */
+
+static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5e_params new_params;
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ new_params = priv->channels.params;
+ new_params.sw_mtu = new_mtu;
+
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+ if (err)
+ goto out;
+
+ netdev->mtu = new_params.sw_mtu;
+
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+int mlx5i_dev_init(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ u8 addr_mod[3];
+
+ /* Set dev address using underlay QP */
+ addr_mod[0] = (ipriv->qpn >> 16) & 0xff;
+ addr_mod[1] = (ipriv->qpn >> 8) & 0xff;
+ addr_mod[2] = (ipriv->qpn) & 0xff;
+ dev_addr_mod(dev, 1, addr_mod, sizeof(addr_mod));
+
+ /* Add QPN to net-device mapping to HT */
+ mlx5i_pkey_add_qpn(dev, ipriv->qpn);
+
+ return 0;
+}
+
+int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return mlx5e_hwstamp_set(priv, ifr);
+ case SIOCGHWTSTAMP:
+ return mlx5e_hwstamp_get(priv, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+void mlx5i_dev_cleanup(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5i_uninit_underlay_qp(priv);
+
+ /* Delete QPN to net-device mapping from HT */
+ mlx5i_pkey_del_qpn(dev, ipriv->qpn);
+}
+
+static int mlx5i_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ int err;
+
+ mutex_lock(&epriv->state_lock);
+
+ set_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ err = mlx5i_init_underlay_qp(epriv);
+ if (err) {
+ mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err);
+ goto err_clear_state_opened_flag;
+ }
+
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn);
+ if (err) {
+ mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err);
+ goto err_reset_qp;
+ }
+
+ err = mlx5e_open_channels(epriv, &epriv->channels);
+ if (err)
+ goto err_remove_fs_underlay_qp;
+
+ epriv->profile->update_rx(epriv);
+ mlx5e_activate_priv_channels(epriv);
+
+ mutex_unlock(&epriv->state_lock);
+ return 0;
+
+err_remove_fs_underlay_qp:
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
+err_reset_qp:
+ mlx5i_uninit_underlay_qp(epriv);
+err_clear_state_opened_flag:
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+ mutex_unlock(&epriv->state_lock);
+ return err;
+}
+
+static int mlx5i_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+
+ /* May already be CLOSED in case a previous configuration operation
+ * (e.g RX/TX queue size change) that involves close&open failed.
+ */
+ mutex_lock(&epriv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &epriv->state))
+ goto unlock;
+
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ netif_carrier_off(epriv->netdev);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
+ mlx5e_deactivate_priv_channels(epriv);
+ mlx5e_close_channels(&epriv->channels);
+ mlx5i_uninit_underlay_qp(epriv);
+unlock:
+ mutex_unlock(&epriv->state_lock);
+ return 0;
+}
+
+/* IPoIB RDMA netdev callbacks */
+static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
+ union ib_gid *gid, u16 lid, int set_qkey,
+ u32 qkey)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ int err;
+
+ mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qpn,
+ gid->raw);
+ err = mlx5_core_attach_mcg(mdev, gid, ipriv->qpn);
+ if (err)
+ mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n",
+ ipriv->qpn, gid->raw);
+
+ if (set_qkey) {
+ mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n",
+ netdev->name, qkey);
+ ipriv->qkey = qkey;
+ }
+
+ return err;
+}
+
+static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
+ union ib_gid *gid, u16 lid)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ int err;
+
+ mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qpn,
+ gid->raw);
+
+ err = mlx5_core_detach_mcg(mdev, gid, ipriv->qpn);
+ if (err)
+ mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n",
+ ipriv->qpn, gid->raw);
+
+ return err;
+}
+
+static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(dev);
+ struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)];
+ struct mlx5_ib_ah *mah = to_mah(address);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+
+ mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more());
+
+ return NETDEV_TX_OK;
+}
+
+static void mlx5i_set_pkey_index(struct net_device *netdev, int id)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+ ipriv->pkey_index = (u16)id;
+}
+
+static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
+{
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
+ mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void mlx5_rdma_netdev_free(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ const struct mlx5e_profile *profile = priv->profile;
+
+ mlx5e_detach_netdev(priv);
+ profile->cleanup(priv);
+
+ if (!ipriv->sub_interface) {
+ mlx5i_pkey_qpn_ht_cleanup(netdev);
+ mlx5e_destroy_mdev_resources(mdev);
+ }
+}
+
+static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev)
+{
+ return mdev->mlx5e_res.hw_objs.pdn != 0;
+}
+
+static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev)
+{
+ if (mlx5_is_sub_interface(mdev))
+ return mlx5i_pkey_get_profile();
+ return &mlx5i_nic_profile;
+}
+
+static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num,
+ struct net_device *netdev, void *param)
+{
+ struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param;
+ const struct mlx5e_profile *prof = mlx5_get_profile(mdev);
+ struct mlx5i_priv *ipriv;
+ struct mlx5e_priv *epriv;
+ struct rdma_netdev *rn;
+ int err;
+
+ ipriv = netdev_priv(netdev);
+ epriv = mlx5i_epriv(netdev);
+
+ ipriv->sub_interface = mlx5_is_sub_interface(mdev);
+ if (!ipriv->sub_interface) {
+ err = mlx5i_pkey_qpn_ht_init(netdev);
+ if (err) {
+ mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n");
+ return err;
+ }
+
+ /* This should only be called once per mdev */
+ err = mlx5e_create_mdev_resources(mdev);
+ if (err)
+ goto destroy_ht;
+ }
+
+ err = mlx5e_priv_init(epriv, prof, netdev, mdev);
+ if (err)
+ goto destroy_mdev_resources;
+
+ epriv->profile = prof;
+ epriv->ppriv = ipriv;
+
+ prof->init(mdev, netdev);
+
+ err = mlx5e_attach_netdev(epriv);
+ if (err)
+ goto detach;
+ netif_carrier_off(netdev);
+
+ /* set rdma_netdev func pointers */
+ rn = &ipriv->rn;
+ rn->hca = ibdev;
+ rn->send = mlx5i_xmit;
+ rn->attach_mcast = mlx5i_attach_mcast;
+ rn->detach_mcast = mlx5i_detach_mcast;
+ rn->set_id = mlx5i_set_pkey_index;
+
+ netdev->priv_destructor = mlx5_rdma_netdev_free;
+ netdev->needs_free_netdev = 1;
+
+ return 0;
+
+detach:
+ prof->cleanup(epriv);
+ if (ipriv->sub_interface)
+ return err;
+destroy_mdev_resources:
+ mlx5e_destroy_mdev_resources(mdev);
+destroy_ht:
+ mlx5i_pkey_qpn_ht_cleanup(netdev);
+ return err;
+}
+
+int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev,
+ struct ib_device *device,
+ struct rdma_netdev_alloc_params *params)
+{
+ int nch;
+ int rc;
+
+ rc = mlx5i_check_required_hca_cap(mdev);
+ if (rc)
+ return rc;
+
+ nch = mlx5e_get_max_num_channels(mdev);
+
+ *params = (struct rdma_netdev_alloc_params){
+ .sizeof_priv = sizeof(struct mlx5i_priv) +
+ sizeof(struct mlx5e_priv),
+ .txqs = nch * MLX5E_MAX_NUM_TC,
+ .rxqs = nch,
+ .param = mdev,
+ .initialize_rdma_netdev = mlx5_rdma_setup_rn,
+ };
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_rdma_rn_get_params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
new file mode 100644
index 000000000..f3f2af972
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_IPOB_H__
+#define __MLX5E_IPOB_H__
+
+#ifdef CONFIG_MLX5_CORE_IPOIB
+
+#include <linux/mlx5/fs.h>
+#include "en.h"
+
+#define MLX5I_MAX_NUM_TC 1
+
+extern const struct ethtool_ops mlx5i_ethtool_ops;
+extern const struct ethtool_ops mlx5i_pkey_ethtool_ops;
+extern const struct mlx5e_rx_handlers mlx5i_rx_handlers;
+
+#define MLX5_IB_GRH_BYTES 40
+#define MLX5_IPOIB_ENCAP_LEN 4
+#define MLX5_IPOIB_PSEUDO_LEN 20
+#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN)
+
+/* ipoib rdma netdev's private data structure */
+struct mlx5i_priv {
+ struct rdma_netdev rn; /* keep this first */
+ u32 qpn;
+ bool sub_interface;
+ u32 num_sub_interfaces;
+ u32 qkey;
+ u16 pkey_index;
+ struct mlx5i_pkey_qpn_ht *qpn_htbl;
+ struct net_device *parent_dev;
+ char *mlx5e_priv[];
+};
+
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn);
+
+/* Underlay QP create/destroy functions */
+int mlx5i_create_underlay_qp(struct mlx5e_priv *priv);
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn);
+
+/* Underlay QP state modification init/uninit functions */
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv);
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv);
+
+/* Allocate/Free underlay QPN to net-device hash table */
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev);
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev);
+
+/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn);
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn);
+
+/* Get the net-device corresponding to the given underlay QPN */
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn);
+
+/* Shared ndo functions */
+int mlx5i_dev_init(struct net_device *dev);
+void mlx5i_dev_cleanup(struct net_device *dev);
+int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+
+/* Parent profile functions */
+int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev);
+void mlx5i_cleanup(struct mlx5e_priv *priv);
+
+int mlx5i_update_nic_rx(struct mlx5e_priv *priv);
+
+/* Get child interface nic profile */
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
+
+/* Extract mlx5e_priv from IPoIB netdev */
+#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv))
+
+struct mlx5_wqe_eth_pad {
+ u8 rsvd0[16];
+};
+
+struct mlx5i_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_datagram_seg datagram;
+ struct mlx5_wqe_eth_pad pad;
+ struct mlx5_wqe_eth_seg eth;
+ struct mlx5_wqe_data_seg data[];
+};
+
+#define MLX5I_SQ_FETCH_WQE(sq, pi) \
+ ((struct mlx5i_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5i_tx_wqe)))
+
+void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more);
+void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
+
+/* Reference management for child to parent interfaces. */
+struct net_device *mlx5i_parent_get(struct net_device *netdev);
+void mlx5i_parent_put(struct net_device *netdev);
+
+#endif /* CONFIG_MLX5_CORE_IPOIB */
+#endif /* __MLX5E_IPOB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
new file mode 100644
index 000000000..0cf4eaf85
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include "ipoib.h"
+
+#define MLX5I_MAX_LOG_PKEY_SUP 7
+
+struct qpn_to_netdev {
+ struct net_device *netdev;
+ struct hlist_node hlist;
+ u32 underlay_qpn;
+};
+
+struct mlx5i_pkey_qpn_ht {
+ struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP];
+ spinlock_t ht_lock; /* Synchronise with NAPI */
+};
+
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct mlx5i_pkey_qpn_ht *qpn_htbl;
+
+ qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL);
+ if (!qpn_htbl)
+ return -ENOMEM;
+
+ ipriv->qpn_htbl = qpn_htbl;
+ spin_lock_init(&qpn_htbl->ht_lock);
+
+ return 0;
+}
+
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+ kfree(ipriv->qpn_htbl);
+}
+
+static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets,
+ u32 qpn)
+{
+ struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)];
+ struct qpn_to_netdev *node;
+
+ hlist_for_each_entry(node, h, hlist) {
+ if (node->underlay_qpn == qpn)
+ return node;
+ }
+
+ return NULL;
+}
+
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+ u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP);
+ struct qpn_to_netdev *new_node;
+
+ new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node)
+ return -ENOMEM;
+
+ new_node->netdev = netdev;
+ new_node->underlay_qpn = qpn;
+ spin_lock_bh(&ht->ht_lock);
+ hlist_add_head(&new_node->hlist, &ht->buckets[key]);
+ spin_unlock_bh(&ht->ht_lock);
+
+ return 0;
+}
+
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+ struct qpn_to_netdev *node;
+
+ node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn);
+ if (!node) {
+ mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n");
+ return -EINVAL;
+ }
+
+ spin_lock_bh(&ht->ht_lock);
+ hlist_del_init(&node->hlist);
+ spin_unlock_bh(&ht->ht_lock);
+ kfree(node);
+
+ return 0;
+}
+
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct qpn_to_netdev *node;
+
+ node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn);
+ if (!node)
+ return NULL;
+
+ return node->netdev;
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev);
+static int mlx5i_pkey_close(struct net_device *netdev);
+static int mlx5i_pkey_dev_init(struct net_device *dev);
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev);
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu);
+static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+
+static const struct net_device_ops mlx5i_pkey_netdev_ops = {
+ .ndo_open = mlx5i_pkey_open,
+ .ndo_stop = mlx5i_pkey_close,
+ .ndo_init = mlx5i_pkey_dev_init,
+ .ndo_get_stats64 = mlx5i_get_stats,
+ .ndo_uninit = mlx5i_pkey_dev_cleanup,
+ .ndo_change_mtu = mlx5i_pkey_change_mtu,
+ .ndo_eth_ioctl = mlx5i_pkey_ioctl,
+};
+
+/* Child NDOs */
+static int mlx5i_pkey_dev_init(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv, *parent_ipriv;
+ struct net_device *parent_dev;
+
+ ipriv = priv->ppriv;
+
+ /* Link to parent */
+ parent_dev = mlx5i_parent_get(dev);
+ if (!parent_dev) {
+ mlx5_core_warn(priv->mdev, "failed to get parent device\n");
+ return -EINVAL;
+ }
+
+ if (dev->num_rx_queues < parent_dev->real_num_rx_queues) {
+ mlx5_core_warn(priv->mdev,
+ "failed to create child device with rx queues [%d] less than parent's [%d]\n",
+ dev->num_rx_queues,
+ parent_dev->real_num_rx_queues);
+ mlx5i_parent_put(dev);
+ return -EINVAL;
+ }
+
+ /* Get QPN to netdevice hash table from parent */
+ parent_ipriv = netdev_priv(parent_dev);
+ ipriv->qpn_htbl = parent_ipriv->qpn_htbl;
+
+ return mlx5i_dev_init(dev);
+}
+
+static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ return mlx5i_ioctl(dev, ifr, cmd);
+}
+
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev)
+{
+ mlx5i_parent_put(netdev);
+ return mlx5i_dev_cleanup(netdev);
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ int err;
+
+ mutex_lock(&epriv->state_lock);
+
+ set_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ err = mlx5i_init_underlay_qp(epriv);
+ if (err) {
+ mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err);
+ goto err_release_lock;
+ }
+
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn);
+ if (err) {
+ mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err);
+ goto err_unint_underlay_qp;
+ }
+
+ err = mlx5i_create_tis(mdev, ipriv->qpn, &epriv->tisn[0][0]);
+ if (err) {
+ mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
+ goto err_remove_rx_uderlay_qp;
+ }
+
+ err = mlx5e_open_channels(epriv, &epriv->channels);
+ if (err) {
+ mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
+ goto err_clear_state_opened_flag;
+ }
+ epriv->profile->update_rx(epriv);
+ mlx5e_activate_priv_channels(epriv);
+ mutex_unlock(&epriv->state_lock);
+
+ return 0;
+
+err_clear_state_opened_flag:
+ mlx5e_destroy_tis(mdev, epriv->tisn[0][0]);
+err_remove_rx_uderlay_qp:
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
+err_unint_underlay_qp:
+ mlx5i_uninit_underlay_qp(epriv);
+err_release_lock:
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+ mutex_unlock(&epriv->state_lock);
+ return err;
+}
+
+static int mlx5i_pkey_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ netif_carrier_off(priv->netdev);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
+ mlx5i_uninit_underlay_qp(priv);
+ mlx5e_deactivate_priv_channels(priv);
+ mlx5e_close_channels(&priv->channels);
+ mlx5e_destroy_tis(mdev, priv->tisn[0][0]);
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return 0;
+}
+
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ mutex_lock(&priv->state_lock);
+ netdev->mtu = new_mtu;
+ mutex_unlock(&priv->state_lock);
+
+ return 0;
+}
+
+/* Called directly after IPoIB netdevice was created to initialize SW structs */
+static int mlx5i_pkey_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ int err;
+
+ err = mlx5i_init(mdev, netdev);
+ if (err)
+ return err;
+
+ /* Override parent ndo */
+ netdev->netdev_ops = &mlx5i_pkey_netdev_ops;
+
+ /* Set child limited ethtool support */
+ netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops;
+
+ /* Use dummy rqs */
+ priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+
+ return 0;
+}
+
+/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
+static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv)
+{
+ mlx5i_cleanup(priv);
+}
+
+static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5i_create_underlay_qp(priv);
+ if (err)
+ mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err);
+
+ return err;
+}
+
+static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn);
+}
+
+static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv)
+{
+ /* Since the rx resources are shared between child and parent, the
+ * parent interface is taking care of rx resource allocation and init
+ */
+ return 0;
+}
+
+static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv)
+{
+ /* Since the rx resources are shared between child and parent, the
+ * parent interface is taking care of rx resource free and de-init
+ */
+}
+
+static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
+ .init = mlx5i_pkey_init,
+ .cleanup = mlx5i_pkey_cleanup,
+ .init_tx = mlx5i_pkey_init_tx,
+ .cleanup_tx = mlx5i_pkey_cleanup_tx,
+ .init_rx = mlx5i_pkey_init_rx,
+ .cleanup_rx = mlx5i_pkey_cleanup_rx,
+ .enable = NULL,
+ .disable = NULL,
+ .update_rx = mlx5i_update_nic_rx,
+ .update_stats = NULL,
+ .rx_handlers = &mlx5i_rx_handlers,
+ .max_tc = MLX5I_MAX_NUM_TC,
+};
+
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
+{
+ return &mlx5i_pkey_nic_profile;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
new file mode 100644
index 000000000..380a208ab
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "mlx5_irq.h"
+#include "pci_irq.h"
+
+static void cpu_put(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]--;
+}
+
+static void cpu_get(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]++;
+}
+
+/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */
+static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask)
+{
+ int best_cpu = -1;
+ int cpu;
+
+ for_each_cpu_and(cpu, req_mask, cpu_online_mask) {
+ /* CPU has zero IRQs on it. No need to search any more CPUs. */
+ if (!pool->irqs_per_cpu[cpu]) {
+ best_cpu = cpu;
+ break;
+ }
+ if (best_cpu < 0)
+ best_cpu = cpu;
+ if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu])
+ best_cpu = cpu;
+ }
+ if (best_cpu == -1) {
+ /* There isn't online CPUs in req_mask */
+ mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n",
+ cpumask_pr_args(req_mask));
+ best_cpu = cpumask_first(cpu_online_mask);
+ }
+ pool->irqs_per_cpu[best_cpu]++;
+ return best_cpu;
+}
+
+/* Creating an IRQ from irq_pool */
+static struct mlx5_irq *
+irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ cpumask_var_t auto_mask;
+ struct mlx5_irq *irq;
+ u32 irq_index;
+ int err;
+
+ if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
+ if (err)
+ return ERR_PTR(err);
+ if (pool->irqs_per_cpu) {
+ if (cpumask_weight(req_mask) > 1)
+ /* if req_mask contain more then one CPU, set the least loadad CPU
+ * of req_mask
+ */
+ cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask);
+ else
+ cpu_get(pool, cpumask_first(req_mask));
+ }
+ irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask);
+ free_cpumask_var(auto_mask);
+ return irq;
+}
+
+/* Looking for the IRQ with the smallest refcount that fits req_mask.
+ * If pool is sf_comp_pool, then we are looking for an IRQ with any of the
+ * requested CPUs in req_mask.
+ * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask
+ * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask,
+ * we don't skip it.
+ * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will
+ * fit. And since mask is subset of itself, we will pass the first if bellow.
+ */
+static struct mlx5_irq *
+irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ int start = pool->xa_num_irqs.min;
+ int end = pool->xa_num_irqs.max;
+ struct mlx5_irq *irq = NULL;
+ struct mlx5_irq *iter;
+ int irq_refcount = 0;
+ unsigned long index;
+
+ lockdep_assert_held(&pool->lock);
+ xa_for_each_range(&pool->irqs, index, iter, start, end) {
+ struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter);
+ int iter_refcount = mlx5_irq_read_locked(iter);
+
+ if (!cpumask_subset(iter_mask, req_mask))
+ /* skip IRQs with a mask which is not subset of req_mask */
+ continue;
+ if (iter_refcount < pool->min_threshold)
+ /* If we found an IRQ with less than min_thres, return it */
+ return iter;
+ if (!irq || iter_refcount < irq_refcount) {
+ /* In case we won't find an IRQ with less than min_thres,
+ * keep a pointer to the least used IRQ
+ */
+ irq_refcount = iter_refcount;
+ irq = iter;
+ }
+ }
+ return irq;
+}
+
+/**
+ * mlx5_irq_affinity_request - request an IRQ according to the given mask.
+ * @pool: IRQ pool to request from.
+ * @req_mask: cpumask requested for this IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ struct mlx5_irq *least_loaded_irq, *new_irq;
+
+ mutex_lock(&pool->lock);
+ least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask);
+ if (least_loaded_irq &&
+ mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold)
+ goto out;
+ /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */
+ new_irq = irq_pool_request_irq(pool, req_mask);
+ if (IS_ERR(new_irq)) {
+ if (!least_loaded_irq) {
+ /* We failed to create an IRQ and we didn't find an IRQ */
+ mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
+ PTR_ERR(new_irq));
+ mutex_unlock(&pool->lock);
+ return new_irq;
+ }
+ /* We failed to create a new IRQ for the requested affinity,
+ * sharing existing IRQ.
+ */
+ goto out;
+ }
+ least_loaded_irq = new_irq;
+ goto unlock;
+out:
+ mlx5_irq_get_locked(least_loaded_irq);
+ if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold)
+ mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
+ pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(least_loaded_irq)), pool->name,
+ mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ);
+unlock:
+ mutex_unlock(&pool->lock);
+ return least_loaded_irq;
+}
+
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ int i;
+
+ for (i = 0; i < num_irqs; i++) {
+ int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i]));
+
+ synchronize_irq(pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(irqs[i])));
+ if (mlx5_irq_put(irqs[i]))
+ if (pool->irqs_per_cpu)
+ cpu_put(pool, cpu);
+ }
+}
+
+/**
+ * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bounded to at most 1 CPU.
+ * This function is requesting IRQs according to the default assignment.
+ * The default assignment policy is:
+ * - in each iteration, request the least loaded IRQ which is not bound to any
+ * CPU of the previous IRQs requested.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+ int i = 0;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_copy(req_mask, cpu_online_mask);
+ for (i = 0; i < nirqs; i++) {
+ if (mlx5_irq_pool_is_sf_pool(pool))
+ irq = mlx5_irq_affinity_request(pool, req_mask);
+ else
+ /* In case SF pool doesn't exists, fallback to the PF IRQs.
+ * The PF IRQs are already allocated and binded to CPU
+ * at this point. Hence, only an index is needed.
+ */
+ irq = mlx5_irq_request(dev, i, NULL);
+ if (IS_ERR(irq))
+ break;
+ irqs[i] = irq;
+ cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask);
+ mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
+ pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
+ cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
+ mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
+ }
+ free_cpumask_var(req_mask);
+ if (!i)
+ return PTR_ERR(irq);
+ return i;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
new file mode 100644
index 000000000..b8feaf0f5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "lag.h"
+
+static char *get_str_mode_type(struct mlx5_lag *ldev)
+{
+ switch (ldev->mode) {
+ case MLX5_LAG_MODE_ROCE: return "roce";
+ case MLX5_LAG_MODE_SRIOV: return "switchdev";
+ case MLX5_LAG_MODE_MULTIPATH: return "multipath";
+ case MLX5_LAG_MODE_MPESW: return "multiport_eswitch";
+ default: return "invalid";
+ }
+
+ return NULL;
+}
+
+static int type_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ char *mode = NULL;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ if (__mlx5_lag_is_active(ldev))
+ mode = get_str_mode_type(ldev);
+ mutex_unlock(&ldev->lock);
+ if (!mode)
+ return -EINVAL;
+ seq_printf(file, "%s\n", mode);
+
+ return 0;
+}
+
+static int port_sel_mode_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ int ret = 0;
+ char *mode;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ if (__mlx5_lag_is_active(ldev))
+ mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags);
+ else
+ ret = -EINVAL;
+ mutex_unlock(&ldev->lock);
+ if (ret)
+ return ret;
+
+ seq_printf(file, "%s\n", mode);
+ return 0;
+}
+
+static int state_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ bool active;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ active = __mlx5_lag_is_active(ldev);
+ mutex_unlock(&ldev->lock);
+ seq_printf(file, "%s\n", active ? "active" : "disabled");
+ return 0;
+}
+
+static int flags_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ bool fdb_sel_mode_native;
+ struct mlx5_lag *ldev;
+ bool shared_fdb;
+ bool lag_active;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ lag_active = __mlx5_lag_is_active(ldev);
+ if (!lag_active)
+ goto unlock;
+
+ shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &ldev->mode_flags);
+
+unlock:
+ mutex_unlock(&ldev->lock);
+ if (!lag_active)
+ return -EINVAL;
+
+ seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off");
+ seq_printf(file, "%s:%s\n", "fdb_selection_mode",
+ fdb_sel_mode_native ? "native" : "affinity");
+ return 0;
+}
+
+static int mapping_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ u8 ports[MLX5_MAX_PORTS] = {};
+ struct mlx5_lag *ldev;
+ bool hash = false;
+ bool lag_active;
+ int num_ports;
+ int i;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ lag_active = __mlx5_lag_is_active(ldev);
+ if (lag_active) {
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
+ mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports,
+ &num_ports);
+ hash = true;
+ } else {
+ for (i = 0; i < ldev->ports; i++)
+ ports[i] = ldev->v2p_map[i];
+ num_ports = ldev->ports;
+ }
+ }
+ mutex_unlock(&ldev->lock);
+ if (!lag_active)
+ return -EINVAL;
+
+ for (i = 0; i < num_ports; i++) {
+ if (hash)
+ seq_printf(file, "%d\n", ports[i] + 1);
+ else
+ seq_printf(file, "%d:%d\n", i + 1, ports[i]);
+ }
+
+ return 0;
+}
+
+static int members_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_core_dev *dev = file->private;
+ struct mlx5_lag *ldev;
+ int i;
+
+ ldev = dev->priv.lag;
+ mutex_lock(&ldev->lock);
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].dev)
+ continue;
+ seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device));
+ }
+ mutex_unlock(&ldev->lock);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(type);
+DEFINE_SHOW_ATTRIBUTE(port_sel_mode);
+DEFINE_SHOW_ATTRIBUTE(state);
+DEFINE_SHOW_ATTRIBUTE(flags);
+DEFINE_SHOW_ATTRIBUTE(mapping);
+DEFINE_SHOW_ATTRIBUTE(members);
+
+void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev)
+{
+ struct dentry *dbg;
+
+ dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev));
+ dev->priv.dbg.lag_debugfs = dbg;
+
+ debugfs_create_file("type", 0444, dbg, dev, &type_fops);
+ debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops);
+ debugfs_create_file("state", 0444, dbg, dev, &state_fops);
+ debugfs_create_file("flags", 0444, dbg, dev, &flags_fops);
+ debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops);
+ debugfs_create_file("members", 0444, dbg, dev, &members_fops);
+}
+
+void mlx5_ldev_remove_debugfs(struct dentry *dbg)
+{
+ debugfs_remove_recursive(dbg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
new file mode 100644
index 000000000..ad32b80e8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -0,0 +1,1580 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <net/bonding.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "lag.h"
+#include "mp.h"
+#include "mpesw.h"
+
+enum {
+ MLX5_LAG_EGRESS_PORT_1 = 1,
+ MLX5_LAG_EGRESS_PORT_2,
+};
+
+/* General purpose, use for short periods of time.
+ * Beware of lock dependencies (preferably, no locks should be acquired
+ * under it).
+ */
+static DEFINE_SPINLOCK(lag_lock);
+
+static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
+{
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
+
+ return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
+}
+
+static u8 lag_active_port_bits(struct mlx5_lag *ldev)
+{
+ u8 enabled_ports[MLX5_MAX_PORTS] = {};
+ u8 active_port = 0;
+ int num_enabled;
+ int idx;
+
+ mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports,
+ &num_enabled);
+ for (idx = 0; idx < num_enabled; idx++)
+ active_port |= BIT_MASK(enabled_ports[idx]);
+
+ return active_port;
+}
+
+static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
+ unsigned long flags)
+{
+ bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &flags);
+ int port_sel_mode = get_port_sel_mode(mode, flags);
+ u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
+ void *lag_ctx;
+
+ lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
+ MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
+ MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
+
+ switch (port_sel_mode) {
+ case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
+ break;
+ case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
+ if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
+ break;
+
+ MLX5_SET(lagc, lag_ctx, active_port,
+ lag_active_port_bits(mlx5_lag_dev(dev)));
+ break;
+ default:
+ break;
+ }
+ MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
+
+ return mlx5_cmd_exec_in(dev, create_lag, in);
+}
+
+static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
+ u8 *ports)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
+ void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
+
+ MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
+ MLX5_SET(modify_lag_in, in, field_select, 0x1);
+
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
+
+ return mlx5_cmd_exec_in(dev, modify_lag, in);
+}
+
+int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
+
+ MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
+
+ return mlx5_cmd_exec_in(dev, create_vport_lag, in);
+}
+EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
+
+int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
+
+ MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
+
+ return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
+}
+EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+
+static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_disabled)
+{
+ int i;
+
+ *num_disabled = 0;
+ for (i = 0; i < num_ports; i++) {
+ if (!tracker->netdev_state[i].tx_enabled ||
+ !tracker->netdev_state[i].link_up)
+ ports[(*num_disabled)++] = i;
+ }
+}
+
+void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_enabled)
+{
+ int i;
+
+ *num_enabled = 0;
+ for (i = 0; i < num_ports; i++) {
+ if (tracker->netdev_state[i].tx_enabled &&
+ tracker->netdev_state[i].link_up)
+ ports[(*num_enabled)++] = i;
+ }
+
+ if (*num_enabled == 0)
+ mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
+}
+
+static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
+ struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ unsigned long flags)
+{
+ char buf[MLX5_MAX_PORTS * 10 + 1] = {};
+ u8 enabled_ports[MLX5_MAX_PORTS] = {};
+ int written = 0;
+ int num_enabled;
+ int idx;
+ int err;
+ int i;
+ int j;
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
+ mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
+ &num_enabled);
+ for (i = 0; i < num_enabled; i++) {
+ err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
+ if (err != 3)
+ return;
+ written += err;
+ }
+ buf[written - 2] = 0;
+ mlx5_core_info(dev, "lag map active ports: %s\n", buf);
+ } else {
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ err = scnprintf(buf + written, 10,
+ " port %d:%d", i + 1, ldev->v2p_map[idx]);
+ if (err != 9)
+ return;
+ written += err;
+ }
+ }
+ mlx5_core_info(dev, "lag map:%s\n", buf);
+ }
+}
+
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr);
+static void mlx5_do_bond_work(struct work_struct *work);
+
+static void mlx5_ldev_free(struct kref *ref)
+{
+ struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
+
+ if (ldev->nb.notifier_call)
+ unregister_netdevice_notifier_net(&init_net, &ldev->nb);
+ mlx5_lag_mp_cleanup(ldev);
+ cancel_delayed_work_sync(&ldev->bond_work);
+ destroy_workqueue(ldev->wq);
+ mlx5_lag_mpesw_cleanup(ldev);
+ mutex_destroy(&ldev->lock);
+ kfree(ldev);
+}
+
+static void mlx5_ldev_put(struct mlx5_lag *ldev)
+{
+ kref_put(&ldev->ref, mlx5_ldev_free);
+}
+
+static void mlx5_ldev_get(struct mlx5_lag *ldev)
+{
+ kref_get(&ldev->ref);
+}
+
+static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ int err;
+
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev)
+ return NULL;
+
+ ldev->wq = create_singlethread_workqueue("mlx5_lag");
+ if (!ldev->wq) {
+ kfree(ldev);
+ return NULL;
+ }
+
+ kref_init(&ldev->ref);
+ mutex_init(&ldev->lock);
+ INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+
+ ldev->nb.notifier_call = mlx5_lag_netdev_event;
+ if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
+ ldev->nb.notifier_call = NULL;
+ mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+ }
+ ldev->mode = MLX5_LAG_MODE_NONE;
+
+ err = mlx5_lag_mp_init(ldev);
+ if (err)
+ mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
+ err);
+
+ mlx5_lag_mpesw_init(ldev);
+ ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
+ ldev->buckets = 1;
+
+ return ldev;
+}
+
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+ struct net_device *ndev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->pf[i].netdev == ndev)
+ return i;
+
+ return -ENOENT;
+}
+
+static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
+{
+ return ldev->mode == MLX5_LAG_MODE_ROCE;
+}
+
+static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
+{
+ return ldev->mode == MLX5_LAG_MODE_SRIOV;
+}
+
+/* Create a mapping between steering slots and active ports.
+ * As we have ldev->buckets slots per port first assume the native
+ * mapping should be used.
+ * If there are ports that are disabled fill the relevant slots
+ * with mapping that points to active ports.
+ */
+static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
+ u8 num_ports,
+ u8 buckets,
+ u8 *ports)
+{
+ int disabled[MLX5_MAX_PORTS] = {};
+ int enabled[MLX5_MAX_PORTS] = {};
+ int disabled_ports_num = 0;
+ int enabled_ports_num = 0;
+ int idx;
+ u32 rand;
+ int i;
+ int j;
+
+ for (i = 0; i < num_ports; i++) {
+ if (tracker->netdev_state[i].tx_enabled &&
+ tracker->netdev_state[i].link_up)
+ enabled[enabled_ports_num++] = i;
+ else
+ disabled[disabled_ports_num++] = i;
+ }
+
+ /* Use native mapping by default where each port's buckets
+ * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
+ */
+ for (i = 0; i < num_ports; i++)
+ for (j = 0; j < buckets; j++) {
+ idx = i * buckets + j;
+ ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
+ }
+
+ /* If all ports are disabled/enabled keep native mapping */
+ if (enabled_ports_num == num_ports ||
+ disabled_ports_num == num_ports)
+ return;
+
+ /* Go over the disabled ports and for each assign a random active port */
+ for (i = 0; i < disabled_ports_num; i++) {
+ for (j = 0; j < buckets; j++) {
+ get_random_bytes(&rand, 4);
+ ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
+ }
+ }
+}
+
+static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->pf[i].has_drop)
+ return true;
+ return false;
+}
+
+static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].has_drop)
+ continue;
+
+ mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
+ MLX5_VPORT_UPLINK);
+ ldev->pf[i].has_drop = false;
+ }
+}
+
+static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ u8 disabled_ports[MLX5_MAX_PORTS] = {};
+ struct mlx5_core_dev *dev;
+ int disabled_index;
+ int num_disabled;
+ int err;
+ int i;
+
+ /* First delete the current drop rule so there won't be any dropped
+ * packets
+ */
+ mlx5_lag_drop_rule_cleanup(ldev);
+
+ if (!ldev->tracker.has_inactive)
+ return;
+
+ mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
+
+ for (i = 0; i < num_disabled; i++) {
+ disabled_index = disabled_ports[i];
+ dev = ldev->pf[disabled_index].dev;
+ err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
+ MLX5_VPORT_UPLINK);
+ if (!err)
+ ldev->pf[disabled_index].has_drop = true;
+ else
+ mlx5_core_err(dev,
+ "Failed to create lag drop rule, error: %d", err);
+ }
+}
+
+static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
+ void *lag_ctx;
+
+ lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
+
+ MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
+ MLX5_SET(modify_lag_in, in, field_select, 0x2);
+
+ MLX5_SET(lagc, lag_ctx, active_port, ports);
+
+ return mlx5_cmd_exec_in(dev, modify_lag, in);
+}
+
+static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ u8 active_ports;
+ int ret;
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
+ ret = mlx5_lag_port_sel_modify(ldev, ports);
+ if (ret ||
+ !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
+ return ret;
+
+ active_ports = lag_active_port_bits(ldev);
+
+ return mlx5_cmd_modify_active_port(dev0, active_ports);
+ }
+ return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
+}
+
+void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ int idx;
+ int err;
+ int i;
+ int j;
+
+ mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
+
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ if (ports[idx] == ldev->v2p_map[idx])
+ continue;
+ err = _mlx5_modify_lag(ldev, ports);
+ if (err) {
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ return;
+ }
+ memcpy(ldev->v2p_map, ports, sizeof(ports));
+
+ mlx5_lag_print_mapping(dev0, ldev, tracker,
+ ldev->mode_flags);
+ break;
+ }
+ }
+
+ if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+ !(ldev->mode == MLX5_LAG_MODE_ROCE))
+ mlx5_lag_drop_rule_setup(ldev, tracker);
+}
+
+static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
+ unsigned long *flags)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+
+ if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
+ if (ldev->ports > 2)
+ return -EINVAL;
+ return 0;
+ }
+
+ if (ldev->ports > 2)
+ ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
+
+ set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
+
+ return 0;
+}
+
+static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ enum mlx5_lag_mode mode,
+ unsigned long *flags)
+{
+ struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ return;
+
+ if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
+ set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
+}
+
+static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
+ struct lag_tracker *tracker, bool shared_fdb,
+ unsigned long *flags)
+{
+ bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
+
+ *flags = 0;
+ if (shared_fdb) {
+ set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
+ }
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
+
+ if (roce_lag)
+ return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
+
+ mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
+ return 0;
+}
+
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
+{
+ int port_sel_mode = get_port_sel_mode(mode, flags);
+
+ switch (port_sel_mode) {
+ case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
+ case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
+ case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
+ default: return "invalid";
+ }
+}
+
+static int mlx5_create_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ enum mlx5_lag_mode mode,
+ unsigned long flags)
+{
+ bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
+ int err;
+
+ if (tracker)
+ mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
+ mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
+ shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
+
+ err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
+ if (err) {
+ mlx5_core_err(dev0,
+ "Failed to create LAG (%d)\n",
+ err);
+ return err;
+ }
+
+ if (shared_fdb) {
+ err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+ dev1->priv.eswitch);
+ if (err)
+ mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+ else
+ mlx5_core_info(dev0, "Operation mode is single FDB\n");
+ }
+
+ if (err) {
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+ if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ }
+
+ return err;
+}
+
+int mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ enum mlx5_lag_mode mode,
+ bool shared_fdb)
+{
+ bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ unsigned long flags = 0;
+ int err;
+
+ err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
+ if (err)
+ return err;
+
+ if (mode != MLX5_LAG_MODE_MPESW) {
+ mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
+ err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
+ ldev->v2p_map);
+ if (err) {
+ mlx5_core_err(dev0,
+ "Failed to create LAG port selection(%d)\n",
+ err);
+ return err;
+ }
+ }
+ }
+
+ err = mlx5_create_lag(ldev, tracker, mode, flags);
+ if (err) {
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ mlx5_lag_port_sel_destroy(ldev);
+ if (roce_lag)
+ mlx5_core_err(dev0,
+ "Failed to activate RoCE LAG\n");
+ else
+ mlx5_core_err(dev0,
+ "Failed to activate VF LAG\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ return err;
+ }
+
+ if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+ !roce_lag)
+ mlx5_lag_drop_rule_setup(ldev, tracker);
+
+ ldev->mode = mode;
+ ldev->mode_flags = flags;
+ return 0;
+}
+
+static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
+ bool roce_lag = __mlx5_lag_is_roce(ldev);
+ unsigned long flags = ldev->mode_flags;
+ int err;
+
+ ldev->mode = MLX5_LAG_MODE_NONE;
+ ldev->mode_flags = 0;
+ mlx5_lag_mp_reset(ldev);
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
+ mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
+ dev1->priv.eswitch);
+ clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
+ }
+
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+ err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to deactivate VF LAG; driver restart required\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ return err;
+ }
+
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ mlx5_lag_port_sel_destroy(ldev);
+ if (mlx5_lag_has_drop_rule(ldev))
+ mlx5_lag_drop_rule_cleanup(ldev);
+
+ return 0;
+}
+
+#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+{
+#ifdef CONFIG_MLX5_ESWITCH
+ struct mlx5_core_dev *dev;
+ u8 mode;
+#endif
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (!ldev->pf[i].dev)
+ return false;
+
+#ifdef CONFIG_MLX5_ESWITCH
+ for (i = 0; i < ldev->ports; i++) {
+ dev = ldev->pf[i].dev;
+ if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
+ return false;
+ }
+
+ dev = ldev->pf[MLX5_LAG_P1].dev;
+ mode = mlx5_eswitch_mode(dev);
+ for (i = 0; i < ldev->ports; i++)
+ if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
+ return false;
+
+ if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
+ return false;
+#else
+ for (i = 0; i < ldev->ports; i++)
+ if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
+ return false;
+#endif
+ return true;
+}
+
+static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].dev)
+ continue;
+
+ if (ldev->pf[i].dev->priv.flags &
+ MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ continue;
+
+ ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(ldev->pf[i].dev);
+ }
+}
+
+static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++) {
+ if (!ldev->pf[i].dev)
+ continue;
+
+ if (ldev->pf[i].dev->priv.flags &
+ MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ continue;
+
+ ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(ldev->pf[i].dev);
+ }
+}
+
+void mlx5_disable_lag(struct mlx5_lag *ldev)
+{
+ bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ bool roce_lag;
+ int err;
+ int i;
+
+ roce_lag = __mlx5_lag_is_roce(ldev);
+
+ if (shared_fdb) {
+ mlx5_lag_remove_devices(ldev);
+ } else if (roce_lag) {
+ if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
+ dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ }
+ for (i = 1; i < ldev->ports; i++)
+ mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
+ }
+
+ err = mlx5_deactivate_lag(ldev);
+ if (err)
+ return;
+
+ if (shared_fdb || roce_lag)
+ mlx5_lag_add_devices(ldev);
+
+ if (shared_fdb) {
+ if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ }
+}
+
+bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+ if (is_mdev_switchdev_mode(dev0) &&
+ is_mdev_switchdev_mode(dev1) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+ mlx5_devcom_is_paired(dev0->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS) &&
+ MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+ MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+ MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+ return true;
+
+ return false;
+}
+
+static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
+{
+ bool roce_lag = true;
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
+
+#ifdef CONFIG_MLX5_ESWITCH
+ for (i = 0; i < ldev->ports; i++)
+ roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
+#endif
+
+ return roce_lag;
+}
+
+static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
+{
+ return do_bond && __mlx5_lag_is_active(ldev) &&
+ ldev->mode != MLX5_LAG_MODE_MPESW;
+}
+
+static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
+{
+ return !do_bond && __mlx5_lag_is_active(ldev) &&
+ ldev->mode != MLX5_LAG_MODE_MPESW;
+}
+
+static void mlx5_do_bond(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ struct lag_tracker tracker = { };
+ bool do_bond, roce_lag;
+ int err;
+ int i;
+
+ if (!mlx5_lag_is_ready(ldev)) {
+ do_bond = false;
+ } else {
+ /* VF LAG is in multipath mode, ignore bond change requests */
+ if (mlx5_lag_is_multipath(dev0))
+ return;
+
+ tracker = ldev->tracker;
+
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+ }
+
+ if (do_bond && !__mlx5_lag_is_active(ldev)) {
+ bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
+ roce_lag = mlx5_lag_is_roce_lag(ldev);
+
+ if (shared_fdb || roce_lag)
+ mlx5_lag_remove_devices(ldev);
+
+ err = mlx5_activate_lag(ldev, &tracker,
+ roce_lag ? MLX5_LAG_MODE_ROCE :
+ MLX5_LAG_MODE_SRIOV,
+ shared_fdb);
+ if (err) {
+ if (shared_fdb || roce_lag)
+ mlx5_lag_add_devices(ldev);
+
+ return;
+ } else if (roce_lag) {
+ dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ for (i = 1; i < ldev->ports; i++)
+ mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
+ } else if (shared_fdb) {
+ dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+
+ err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!err)
+ err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+ if (err) {
+ dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ mlx5_deactivate_lag(ldev);
+ mlx5_lag_add_devices(ldev);
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ mlx5_core_err(dev0, "Failed to enable lag\n");
+ return;
+ }
+ }
+ } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
+ mlx5_modify_lag(ldev, &tracker);
+ } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
+ mlx5_disable_lag(ldev);
+ }
+}
+
+static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
+{
+ queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
+}
+
+static void mlx5_do_bond_work(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
+ bond_work);
+ int status;
+
+ status = mlx5_dev_list_trylock();
+ if (!status) {
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mutex_lock(&ldev->lock);
+ if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
+ mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mlx5_do_bond(ldev);
+ mutex_unlock(&ldev->lock);
+ mlx5_dev_list_unlock();
+}
+
+static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *upper = info->upper_dev, *ndev_tmp;
+ struct netdev_lag_upper_info *lag_upper_info = NULL;
+ bool is_bonded, is_in_lag, mode_supported;
+ bool has_inactive = 0;
+ struct slave *slave;
+ u8 bond_status = 0;
+ int num_slaves = 0;
+ int changed = 0;
+ int idx;
+
+ if (!netif_is_lag_master(upper))
+ return 0;
+
+ if (info->linking)
+ lag_upper_info = info->upper_info;
+
+ /* The event may still be of interest if the slave does not belong to
+ * us, but is enslaved to a master which has one or more of our netdevs
+ * as slaves (e.g., if a new slave is added to a master that bonds two
+ * of our netdevs, we should unbond).
+ */
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
+ if (idx >= 0) {
+ slave = bond_slave_get_rcu(ndev_tmp);
+ if (slave)
+ has_inactive |= bond_is_slave_inactive(slave);
+ bond_status |= (1 << idx);
+ }
+
+ num_slaves++;
+ }
+ rcu_read_unlock();
+
+ /* None of this lagdev's netdevs are slaves of this master. */
+ if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
+ return 0;
+
+ if (lag_upper_info) {
+ tracker->tx_type = lag_upper_info->tx_type;
+ tracker->hash_type = lag_upper_info->hash_type;
+ }
+
+ tracker->has_inactive = has_inactive;
+ /* Determine bonding status:
+ * A device is considered bonded if both its physical ports are slaves
+ * of the same lag master, and only them.
+ */
+ is_in_lag = num_slaves == ldev->ports &&
+ bond_status == GENMASK(ldev->ports - 1, 0);
+
+ /* Lag mode must be activebackup or hash. */
+ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
+
+ is_bonded = is_in_lag && mode_supported;
+ if (tracker->is_bonded != is_bonded) {
+ tracker->is_bonded = is_bonded;
+ changed = 1;
+ }
+
+ if (!is_in_lag)
+ return changed;
+
+ if (!mlx5_lag_is_ready(ldev))
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Can't activate LAG offload, PF is configured with more than 64 VFs");
+ else if (!mode_supported)
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Can't activate LAG offload, TX type isn't supported");
+
+ return changed;
+}
+
+static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct net_device *ndev,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ struct netdev_lag_lower_state_info *lag_lower_info;
+ int idx;
+
+ if (!netif_is_lag_port(ndev))
+ return 0;
+
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
+ if (idx < 0)
+ return 0;
+
+ /* This information is used to determine virtual to physical
+ * port mapping.
+ */
+ lag_lower_info = info->lower_state_info;
+ if (!lag_lower_info)
+ return 0;
+
+ tracker->netdev_state[idx] = *lag_lower_info;
+
+ return 1;
+}
+
+static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct net_device *ndev)
+{
+ struct net_device *ndev_tmp;
+ struct slave *slave;
+ bool has_inactive = 0;
+ int idx;
+
+ if (!netif_is_lag_master(ndev))
+ return 0;
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
+ if (idx < 0)
+ continue;
+
+ slave = bond_slave_get_rcu(ndev_tmp);
+ if (slave)
+ has_inactive |= bond_is_slave_inactive(slave);
+ }
+ rcu_read_unlock();
+
+ if (tracker->has_inactive == has_inactive)
+ return 0;
+
+ tracker->has_inactive = has_inactive;
+
+ return 1;
+}
+
+/* this handler is always registered to netdev events */
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct lag_tracker tracker;
+ struct mlx5_lag *ldev;
+ int changed = 0;
+
+ if (event != NETDEV_CHANGEUPPER &&
+ event != NETDEV_CHANGELOWERSTATE &&
+ event != NETDEV_CHANGEINFODATA)
+ return NOTIFY_DONE;
+
+ ldev = container_of(this, struct mlx5_lag, nb);
+
+ tracker = ldev->tracker;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
+ break;
+ case NETDEV_CHANGELOWERSTATE:
+ changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
+ ndev, ptr);
+ break;
+ case NETDEV_CHANGEINFODATA:
+ changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
+ break;
+ }
+
+ ldev->tracker = tracker;
+
+ if (changed)
+ mlx5_queue_bond_work(ldev, 0);
+
+ return NOTIFY_DONE;
+}
+
+static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev,
+ struct net_device *netdev)
+{
+ unsigned int fn = mlx5_get_dev_index(dev);
+ unsigned long flags;
+
+ if (fn >= ldev->ports)
+ return;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev->pf[fn].netdev = netdev;
+ ldev->tracker.netdev_state[fn].link_up = 0;
+ ldev->tracker.netdev_state[fn].tx_enabled = 0;
+ spin_unlock_irqrestore(&lag_lock, flags);
+}
+
+static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
+ struct net_device *netdev)
+{
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ for (i = 0; i < ldev->ports; i++) {
+ if (ldev->pf[i].netdev == netdev) {
+ ldev->pf[i].netdev = NULL;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&lag_lock, flags);
+}
+
+static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev)
+{
+ unsigned int fn = mlx5_get_dev_index(dev);
+
+ if (fn >= ldev->ports)
+ return;
+
+ ldev->pf[fn].dev = dev;
+ dev->priv.lag = ldev;
+}
+
+static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->pf[i].dev == dev)
+ break;
+
+ if (i == ldev->ports)
+ return;
+
+ ldev->pf[i].dev = NULL;
+ dev->priv.lag = NULL;
+}
+
+/* Must be called with intf_mutex held */
+static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev = NULL;
+ struct mlx5_core_dev *tmp_dev;
+
+ tmp_dev = mlx5_get_next_phys_dev_lag(dev);
+ if (tmp_dev)
+ ldev = tmp_dev->priv.lag;
+
+ if (!ldev) {
+ ldev = mlx5_lag_dev_alloc(dev);
+ if (!ldev) {
+ mlx5_core_err(dev, "Failed to alloc lag dev\n");
+ return 0;
+ }
+ mlx5_ldev_add_mdev(ldev, dev);
+ return 0;
+ }
+
+ mutex_lock(&ldev->lock);
+ if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
+ return -EAGAIN;
+ }
+ mlx5_ldev_get(ldev);
+ mlx5_ldev_add_mdev(ldev, dev);
+ mutex_unlock(&ldev->lock);
+
+ return 0;
+}
+
+void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ /* mdev is being removed, might as well remove debugfs
+ * as early as possible.
+ */
+ mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
+recheck:
+ mutex_lock(&ldev->lock);
+ if (ldev->mode_changes_in_progress) {
+ mutex_unlock(&ldev->lock);
+ msleep(100);
+ goto recheck;
+ }
+ mlx5_ldev_remove_mdev(ldev, dev);
+ mutex_unlock(&ldev->lock);
+ mlx5_ldev_put(ldev);
+}
+
+void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
+ MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
+ return;
+
+recheck:
+ mlx5_dev_list_lock();
+ err = __mlx5_lag_dev_add_mdev(dev);
+ mlx5_dev_list_unlock();
+
+ if (err) {
+ msleep(100);
+ goto recheck;
+ }
+ mlx5_ldev_add_debugfs(dev);
+}
+
+void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
+ struct net_device *netdev)
+{
+ struct mlx5_lag *ldev;
+ bool lag_is_active;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ mutex_lock(&ldev->lock);
+ mlx5_ldev_remove_netdev(ldev, netdev);
+ clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
+
+ lag_is_active = __mlx5_lag_is_active(ldev);
+ mutex_unlock(&ldev->lock);
+
+ if (lag_is_active)
+ mlx5_queue_bond_work(ldev, 0);
+}
+
+void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
+ struct net_device *netdev)
+{
+ struct mlx5_lag *ldev;
+ int i;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ mutex_lock(&ldev->lock);
+ mlx5_ldev_add_netdev(ldev, dev, netdev);
+
+ for (i = 0; i < ldev->ports; i++)
+ if (!ldev->pf[i].netdev)
+ break;
+
+ if (i >= ldev->ports)
+ set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
+ mutex_unlock(&ldev->lock);
+ mlx5_queue_bond_work(ldev, 0);
+}
+
+bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_roce(ldev);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_roce);
+
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_active(ldev);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_active);
+
+bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res = 0;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (ldev)
+ res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
+
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_active(ldev) &&
+ dev == ldev->pf[MLX5_LAG_P1].dev;
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_master);
+
+bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_sriov);
+
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev) &&
+ test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
+
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ mlx5_dev_list_lock();
+ mutex_lock(&ldev->lock);
+
+ ldev->mode_changes_in_progress++;
+ if (__mlx5_lag_is_active(ldev))
+ mlx5_disable_lag(ldev);
+
+ mutex_unlock(&ldev->lock);
+ mlx5_dev_list_unlock();
+}
+
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ mutex_lock(&ldev->lock);
+ ldev->mode_changes_in_progress--;
+ mutex_unlock(&ldev->lock);
+ mlx5_queue_bond_work(ldev, 0);
+}
+
+struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
+{
+ struct net_device *ndev = NULL;
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
+ goto unlock;
+
+ if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ for (i = 0; i < ldev->ports; i++)
+ if (ldev->tracker.netdev_state[i].tx_enabled)
+ ndev = ldev->pf[i].netdev;
+ if (!ndev)
+ ndev = ldev->pf[ldev->ports - 1].netdev;
+ } else {
+ ndev = ldev->pf[MLX5_LAG_P1].netdev;
+ }
+ if (ndev)
+ dev_hold(ndev);
+
+unlock:
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return ndev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
+
+u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
+ struct net_device *slave)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ u8 port = 0;
+ int i;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
+ goto unlock;
+
+ for (i = 0; i < ldev->ports; i++) {
+ if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
+ port = i;
+ break;
+ }
+ }
+
+ port = ldev->v2p_map[port * ldev->buckets];
+
+unlock:
+ spin_unlock_irqrestore(&lag_lock, flags);
+ return port;
+}
+EXPORT_SYMBOL(mlx5_lag_get_slave_port);
+
+u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return 0;
+
+ return ldev->ports;
+}
+EXPORT_SYMBOL(mlx5_lag_get_num_ports);
+
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev = NULL;
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ goto unlock;
+
+ peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
+ ldev->pf[MLX5_LAG_P2].dev :
+ ldev->pf[MLX5_LAG_P1].dev;
+
+unlock:
+ spin_unlock_irqrestore(&lag_lock, flags);
+ return peer_dev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+ u64 *values,
+ int num_counters,
+ size_t *offsets)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+ struct mlx5_core_dev **mdev;
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ int num_ports;
+ int ret, i, j;
+ void *out;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
+ if (!mdev) {
+ ret = -ENOMEM;
+ goto free_out;
+ }
+
+ memset(values, 0, sizeof(*values) * num_counters);
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (ldev && __mlx5_lag_is_active(ldev)) {
+ num_ports = ldev->ports;
+ for (i = 0; i < ldev->ports; i++)
+ mdev[i] = ldev->pf[i].dev;
+ } else {
+ num_ports = 1;
+ mdev[MLX5_LAG_P1] = dev;
+ }
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ for (i = 0; i < num_ports; ++i) {
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
+
+ MLX5_SET(query_cong_statistics_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+ ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
+ out);
+ if (ret)
+ goto free_mdev;
+
+ for (j = 0; j < num_counters; ++j)
+ values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
+ }
+
+free_mdev:
+ kvfree(mdev);
+free_out:
+ kvfree(out);
+ return ret;
+}
+EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
new file mode 100644
index 000000000..f30ac2de6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_LAG_H__
+#define __MLX5_LAG_H__
+
+#include <linux/debugfs.h>
+
+#define MLX5_LAG_MAX_HASH_BUCKETS 16
+#include "mlx5_core.h"
+#include "mp.h"
+#include "port_sel.h"
+#include "mpesw.h"
+
+enum {
+ MLX5_LAG_P1,
+ MLX5_LAG_P2,
+};
+
+enum {
+ MLX5_LAG_FLAG_NDEVS_READY,
+};
+
+enum {
+ MLX5_LAG_MODE_FLAG_HASH_BASED,
+ MLX5_LAG_MODE_FLAG_SHARED_FDB,
+ MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+};
+
+enum mlx5_lag_mode {
+ MLX5_LAG_MODE_NONE,
+ MLX5_LAG_MODE_ROCE,
+ MLX5_LAG_MODE_SRIOV,
+ MLX5_LAG_MODE_MULTIPATH,
+ MLX5_LAG_MODE_MPESW,
+};
+
+struct lag_func {
+ struct mlx5_core_dev *dev;
+ struct net_device *netdev;
+ bool has_drop;
+};
+
+/* Used for collection of netdev event info. */
+struct lag_tracker {
+ enum netdev_lag_tx_type tx_type;
+ struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
+ unsigned int is_bonded:1;
+ unsigned int has_inactive:1;
+ enum netdev_lag_hash hash_type;
+};
+
+enum mpesw_op {
+ MLX5_MPESW_OP_ENABLE,
+ MLX5_MPESW_OP_DISABLE,
+};
+
+struct mlx5_mpesw_work_st {
+ struct work_struct work;
+ struct mlx5_lag *lag;
+ enum mpesw_op op;
+ struct completion comp;
+ int result;
+};
+
+/* LAG data of a ConnectX card.
+ * It serves both its phys functions.
+ */
+struct mlx5_lag {
+ enum mlx5_lag_mode mode;
+ unsigned long mode_flags;
+ unsigned long state_flags;
+ u8 ports;
+ u8 buckets;
+ int mode_changes_in_progress;
+ u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
+ struct kref ref;
+ struct lag_func pf[MLX5_MAX_PORTS];
+ struct lag_tracker tracker;
+ struct workqueue_struct *wq;
+ struct delayed_work bond_work;
+ struct notifier_block nb;
+ struct lag_mp lag_mp;
+ struct mlx5_lag_port_sel port_sel;
+ /* Protect lag fields/state changes */
+ struct mutex lock;
+ struct lag_mpesw lag_mpesw;
+};
+
+static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ MLX5_CAP_GEN(dev, num_lag_ports) < 2 ||
+ MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS)
+ return false;
+ return true;
+}
+
+static inline struct mlx5_lag *
+mlx5_lag_dev(struct mlx5_core_dev *dev)
+{
+ return dev->priv.lag;
+}
+
+static inline bool
+__mlx5_lag_is_active(struct mlx5_lag *ldev)
+{
+ return ldev->mode != MLX5_LAG_MODE_NONE;
+}
+
+static inline bool
+mlx5_lag_is_ready(struct mlx5_lag *ldev)
+{
+ return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
+}
+
+void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker);
+int mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ enum mlx5_lag_mode mode,
+ bool shared_fdb);
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+ struct net_device *ndev);
+bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev);
+void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev);
+int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev);
+
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags);
+void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
+ u8 *ports, int *num_enabled);
+
+void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev);
+void mlx5_ldev_remove_debugfs(struct dentry *dbg);
+void mlx5_disable_lag(struct mlx5_lag *ldev);
+
+#endif /* __MLX5_LAG_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
new file mode 100644
index 000000000..0259a149a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include <net/nexthop.h>
+#include "lag/lag.h"
+#include "lag/mp.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "lib/mlx5.h"
+
+static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
+{
+ return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
+}
+
+static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
+{
+ if (!mlx5_lag_is_ready(ldev))
+ return false;
+
+ if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
+ return false;
+
+ return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
+ ldev->pf[MLX5_LAG_P2].dev);
+}
+
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_multipath(ldev);
+
+ return res;
+}
+
+/**
+ * mlx5_lag_set_port_affinity
+ *
+ * @ldev: lag device
+ * @port:
+ * 0 - set normal affinity.
+ * 1 - set affinity to port 1.
+ * 2 - set affinity to port 2.
+ *
+ **/
+static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
+ enum mlx5_lag_port_affinity port)
+{
+ struct lag_tracker tracker = {};
+
+ if (!__mlx5_lag_is_multipath(ldev))
+ return;
+
+ switch (port) {
+ case MLX5_LAG_NORMAL_AFFINITY:
+ tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
+ tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
+ tracker.netdev_state[MLX5_LAG_P1].link_up = true;
+ tracker.netdev_state[MLX5_LAG_P2].link_up = true;
+ break;
+ case MLX5_LAG_P1_AFFINITY:
+ tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
+ tracker.netdev_state[MLX5_LAG_P1].link_up = true;
+ tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
+ tracker.netdev_state[MLX5_LAG_P2].link_up = false;
+ break;
+ case MLX5_LAG_P2_AFFINITY:
+ tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
+ tracker.netdev_state[MLX5_LAG_P1].link_up = false;
+ tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
+ tracker.netdev_state[MLX5_LAG_P2].link_up = true;
+ break;
+ default:
+ mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
+ "Invalid affinity port %d", port);
+ return;
+ }
+
+ if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
+ mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
+ MLX5_DEV_EVENT_PORT_AFFINITY,
+ (void *)0);
+
+ if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
+ mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
+ MLX5_DEV_EVENT_PORT_AFFINITY,
+ (void *)0);
+
+ mlx5_modify_lag(ldev, &tracker);
+}
+
+static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
+{
+ struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
+
+ flush_workqueue(mp->wq);
+}
+
+static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
+{
+ mp->fib.mfi = fi;
+ mp->fib.priority = fi->fib_priority;
+ mp->fib.dst = dst;
+ mp->fib.dst_len = dst_len;
+}
+
+struct mlx5_fib_event_work {
+ struct work_struct work;
+ struct mlx5_lag *ldev;
+ unsigned long event;
+ union {
+ struct fib_entry_notifier_info fen_info;
+ struct fib_nh_notifier_info fnh_info;
+ };
+};
+
+static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
+ struct fib_entry_notifier_info *fen_info)
+{
+ struct fib_info *fi = fen_info->fi;
+ struct lag_mp *mp = &ldev->lag_mp;
+ struct fib_nh *fib_nh0, *fib_nh1;
+ unsigned int nhs;
+
+ /* Handle delete event */
+ if (event == FIB_EVENT_ENTRY_DEL) {
+ /* stop track */
+ if (mp->fib.mfi == fi)
+ mp->fib.mfi = NULL;
+ return;
+ }
+
+ /* Handle multipath entry with lower priority value */
+ if (mp->fib.mfi && mp->fib.mfi != fi &&
+ (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
+ fi->fib_priority >= mp->fib.priority)
+ return;
+
+ /* Handle add/replace event */
+ nhs = fib_info_num_path(fi);
+ if (nhs == 1) {
+ if (__mlx5_lag_is_active(ldev)) {
+ struct fib_nh *nh = fib_info_nh(fi, 0);
+ struct net_device *nh_dev = nh->fib_nh_dev;
+ int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
+
+ if (i < 0)
+ return;
+
+ i++;
+ mlx5_lag_set_port_affinity(ldev, i);
+ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
+ }
+
+ return;
+ }
+
+ if (nhs != 2)
+ return;
+
+ /* Verify next hops are ports of the same hca */
+ fib_nh0 = fib_info_nh(fi, 0);
+ fib_nh1 = fib_info_nh(fi, 1);
+ if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
+ fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
+ !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
+ fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
+ mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
+ "Multipath offload require two ports of the same HCA\n");
+ return;
+ }
+
+ /* First time we see multipath route */
+ if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
+ struct lag_tracker tracker;
+
+ tracker = ldev->tracker;
+ mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
+ }
+
+ mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
+ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
+}
+
+static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
+ unsigned long event,
+ struct fib_nh *fib_nh,
+ struct fib_info *fi)
+{
+ struct lag_mp *mp = &ldev->lag_mp;
+
+ /* Check the nh event is related to the route */
+ if (!mp->fib.mfi || mp->fib.mfi != fi)
+ return;
+
+ /* nh added/removed */
+ if (event == FIB_EVENT_NH_DEL) {
+ int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
+
+ if (i >= 0) {
+ i = (i + 1) % 2 + 1; /* peer port */
+ mlx5_lag_set_port_affinity(ldev, i);
+ }
+ } else if (event == FIB_EVENT_NH_ADD &&
+ fib_info_num_path(fi) == 2) {
+ mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
+ }
+}
+
+static void mlx5_lag_fib_update(struct work_struct *work)
+{
+ struct mlx5_fib_event_work *fib_work =
+ container_of(work, struct mlx5_fib_event_work, work);
+ struct mlx5_lag *ldev = fib_work->ldev;
+ struct fib_nh *fib_nh;
+
+ /* Protect internal structures from changes */
+ rtnl_lock();
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE:
+ case FIB_EVENT_ENTRY_DEL:
+ mlx5_lag_fib_route_event(ldev, fib_work->event,
+ &fib_work->fen_info);
+ fib_info_put(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_NH_ADD:
+ case FIB_EVENT_NH_DEL:
+ fib_nh = fib_work->fnh_info.fib_nh;
+ mlx5_lag_fib_nexthop_event(ldev,
+ fib_work->event,
+ fib_work->fnh_info.fib_nh,
+ fib_nh->nh_parent);
+ fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
+ break;
+ }
+
+ rtnl_unlock();
+ kfree(fib_work);
+}
+
+static struct mlx5_fib_event_work *
+mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
+{
+ struct mlx5_fib_event_work *fib_work;
+
+ fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
+ if (WARN_ON(!fib_work))
+ return NULL;
+
+ INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
+ fib_work->ldev = ldev;
+ fib_work->event = event;
+
+ return fib_work;
+}
+
+static int mlx5_lag_fib_event(struct notifier_block *nb,
+ unsigned long event,
+ void *ptr)
+{
+ struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
+ struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
+ struct fib_notifier_info *info = ptr;
+ struct mlx5_fib_event_work *fib_work;
+ struct fib_entry_notifier_info *fen_info;
+ struct fib_nh_notifier_info *fnh_info;
+ struct net_device *fib_dev;
+ struct fib_info *fi;
+
+ if (info->family != AF_INET)
+ return NOTIFY_DONE;
+
+ if (!mlx5_lag_multipath_check_prereq(ldev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case FIB_EVENT_ENTRY_REPLACE:
+ case FIB_EVENT_ENTRY_DEL:
+ fen_info = container_of(info, struct fib_entry_notifier_info,
+ info);
+ fi = fen_info->fi;
+ if (fi->nh)
+ return NOTIFY_DONE;
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
+ fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
+ return NOTIFY_DONE;
+ }
+ fib_work = mlx5_lag_init_fib_work(ldev, event);
+ if (!fib_work)
+ return NOTIFY_DONE;
+ fib_work->fen_info = *fen_info;
+ /* Take reference on fib_info to prevent it from being
+ * freed while work is queued. Release it afterwards.
+ */
+ fib_info_hold(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_NH_ADD:
+ case FIB_EVENT_NH_DEL:
+ fnh_info = container_of(info, struct fib_nh_notifier_info,
+ info);
+ fib_work = mlx5_lag_init_fib_work(ldev, event);
+ if (!fib_work)
+ return NOTIFY_DONE;
+ fib_work->fnh_info = *fnh_info;
+ fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ queue_work(mp->wq, &fib_work->work);
+
+ return NOTIFY_DONE;
+}
+
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
+{
+ /* Clear mfi, as it might become stale when a route delete event
+ * has been missed, see mlx5_lag_fib_route_event().
+ */
+ ldev->lag_mp.fib.mfi = NULL;
+}
+
+int mlx5_lag_mp_init(struct mlx5_lag *ldev)
+{
+ struct lag_mp *mp = &ldev->lag_mp;
+ int err;
+
+ /* always clear mfi, as it might become stale when a route delete event
+ * has been missed
+ */
+ mp->fib.mfi = NULL;
+
+ if (mp->fib_nb.notifier_call)
+ return 0;
+
+ mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
+ if (!mp->wq)
+ return -ENOMEM;
+
+ mp->fib_nb.notifier_call = mlx5_lag_fib_event;
+ err = register_fib_notifier(&init_net, &mp->fib_nb,
+ mlx5_lag_fib_event_flush, NULL);
+ if (err) {
+ destroy_workqueue(mp->wq);
+ mp->fib_nb.notifier_call = NULL;
+ }
+
+ return err;
+}
+
+void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
+{
+ struct lag_mp *mp = &ldev->lag_mp;
+
+ if (!mp->fib_nb.notifier_call)
+ return;
+
+ unregister_fib_notifier(&init_net, &mp->fib_nb);
+ destroy_workqueue(mp->wq);
+ mp->fib_nb.notifier_call = NULL;
+ mp->fib.mfi = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
new file mode 100644
index 000000000..056a066da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_LAG_MP_H__
+#define __MLX5_LAG_MP_H__
+
+#include "lag.h"
+#include "mlx5_core.h"
+
+enum mlx5_lag_port_affinity {
+ MLX5_LAG_NORMAL_AFFINITY,
+ MLX5_LAG_P1_AFFINITY,
+ MLX5_LAG_P2_AFFINITY,
+};
+
+struct lag_mp {
+ struct notifier_block fib_nb;
+ struct {
+ const void *mfi; /* used in tracking fib events */
+ u32 priority;
+ u32 dst;
+ int dst_len;
+ } fib;
+ struct workqueue_struct *wq;
+};
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev);
+int mlx5_lag_mp_init(struct mlx5_lag *ldev);
+void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev);
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {};
+static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
+static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}
+static inline bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; }
+
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_LAG_MP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
new file mode 100644
index 000000000..c17e8f1ec
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <net/nexthop.h>
+#include "lag/lag.h"
+#include "eswitch.h"
+#include "lib/mlx5.h"
+
+static int add_mpesw_rule(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ int err;
+
+ if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
+ return 0;
+
+ if (ldev->mode != MLX5_LAG_MODE_NONE) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ atomic_dec(&ldev->lag_mpesw.mpesw_rule_count);
+ return err;
+}
+
+static void del_mpesw_rule(struct mlx5_lag *ldev)
+{
+ if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
+ ldev->mode == MLX5_LAG_MODE_MPESW)
+ mlx5_disable_lag(ldev);
+}
+
+static void mlx5_mpesw_work(struct work_struct *work)
+{
+ struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work);
+ struct mlx5_lag *ldev = mpesww->lag;
+
+ mutex_lock(&ldev->lock);
+ if (mpesww->op == MLX5_MPESW_OP_ENABLE)
+ mpesww->result = add_mpesw_rule(ldev);
+ else if (mpesww->op == MLX5_MPESW_OP_DISABLE)
+ del_mpesw_rule(ldev);
+ mutex_unlock(&ldev->lock);
+
+ complete(&mpesww->comp);
+}
+
+static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev,
+ enum mpesw_op op)
+{
+ struct mlx5_lag *ldev = dev->priv.lag;
+ struct mlx5_mpesw_work_st *work;
+ int err = 0;
+
+ if (!ldev)
+ return 0;
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ INIT_WORK(&work->work, mlx5_mpesw_work);
+ init_completion(&work->comp);
+ work->op = op;
+ work->lag = ldev;
+
+ if (!queue_work(ldev->wq, &work->work)) {
+ mlx5_core_warn(dev, "failed to queue mpesw work\n");
+ err = -EINVAL;
+ goto out;
+ }
+ wait_for_completion(&work->comp);
+ err = work->result;
+out:
+ kfree(work);
+ return err;
+}
+
+void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
+{
+ mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE);
+}
+
+int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
+{
+ return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE);
+}
+
+int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
+{
+ struct mlx5_lag *ldev = mdev->priv.lag;
+
+ if (!netif_is_bond_master(out_dev) || !ldev)
+ return 0;
+
+ if (ldev->mode == MLX5_LAG_MODE_MPESW)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev)
+{
+ bool ret;
+
+ ret = dev->priv.lag && dev->priv.lag->mode == MLX5_LAG_MODE_MPESW;
+ return ret;
+}
+
+void mlx5_lag_mpesw_init(struct mlx5_lag *ldev)
+{
+ atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0);
+}
+
+void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev)
+{
+ WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
new file mode 100644
index 000000000..88e8daffc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LAG_MPESW_H__
+#define __MLX5_LAG_MPESW_H__
+
+#include "lag.h"
+#include "mlx5_core.h"
+
+struct lag_mpesw {
+ struct work_struct mpesw_work;
+ atomic_t mpesw_rule_count;
+};
+
+int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev);
+bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev);
+#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
+void mlx5_lag_mpesw_init(struct mlx5_lag *ldev);
+void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev);
+#else
+static inline void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) {}
+static inline void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) {}
+#endif
+
+#endif /* __MLX5_LAG_MPESW_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
new file mode 100644
index 000000000..7d9bbb494
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -0,0 +1,637 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/netdevice.h>
+#include "lag.h"
+
+enum {
+ MLX5_LAG_FT_LEVEL_TTC,
+ MLX5_LAG_FT_LEVEL_INNER_TTC,
+ MLX5_LAG_FT_LEVEL_DEFINER,
+};
+
+static struct mlx5_flow_group *
+mlx5_create_hash_flow_group(struct mlx5_flow_table *ft,
+ struct mlx5_flow_definer *definer,
+ u8 rules)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(create_flow_group_in, in, match_definer_id,
+ mlx5_get_match_definer_id(definer));
+ MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1);
+ MLX5_SET(create_flow_group_in, in, group_type,
+ MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT);
+
+ fg = mlx5_create_flow_group(ft, in);
+ kvfree(in);
+ return fg;
+}
+
+static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer *lag_definer,
+ u8 *ports)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_namespace *ns;
+ int err, i;
+ int idx;
+ int j;
+
+ ft_attr.max_fte = ldev->ports * ldev->buckets;
+ ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL);
+ if (!ns) {
+ mlx5_core_warn(dev, "Failed to get port selection namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ lag_definer->ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(lag_definer->ft)) {
+ mlx5_core_warn(dev, "Failed to create port selection table\n");
+ return PTR_ERR(lag_definer->ft);
+ }
+
+ lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft,
+ lag_definer->definer,
+ ft_attr.max_fte);
+ if (IS_ERR(lag_definer->fg)) {
+ err = PTR_ERR(lag_definer->fg);
+ goto destroy_ft;
+ }
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ flow_act.flags |= FLOW_ACT_NO_APPEND;
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ u8 affinity;
+
+ idx = i * ldev->buckets + j;
+ affinity = ports[idx];
+
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev,
+ vhca_id);
+ lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft,
+ NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(lag_definer->rules[idx])) {
+ err = PTR_ERR(lag_definer->rules[idx]);
+ while (i--)
+ while (j--)
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ goto destroy_fg;
+ }
+ }
+ }
+
+ return 0;
+
+destroy_fg:
+ mlx5_destroy_flow_group(lag_definer->fg);
+destroy_ft:
+ mlx5_destroy_flow_table(lag_definer->ft);
+ return err;
+}
+
+static int mlx5_lag_set_definer_inner(u32 *match_definer_mask,
+ enum mlx5_traffic_types tt)
+{
+ int format_id;
+ u8 *ipv6;
+
+ switch (tt) {
+ case MLX5_TT_IPV4_UDP:
+ case MLX5_TT_IPV4_TCP:
+ format_id = 23;
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_l4_sport);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_l4_dport);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_ip_src_addr);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_ip_dest_addr);
+ break;
+ case MLX5_TT_IPV4:
+ format_id = 23;
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_l3_type);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_smac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_ip_src_addr);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_ip_dest_addr);
+ break;
+ case MLX5_TT_IPV6_TCP:
+ case MLX5_TT_IPV6_UDP:
+ format_id = 31;
+ MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask,
+ inner_l4_sport);
+ MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask,
+ inner_l4_dport);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask,
+ inner_ip_dest_addr);
+ memset(ipv6, 0xff, 16);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask,
+ inner_ip_src_addr);
+ memset(ipv6, 0xff, 16);
+ break;
+ case MLX5_TT_IPV6:
+ format_id = 32;
+ ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask,
+ inner_ip_dest_addr);
+ memset(ipv6, 0xff, 16);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask,
+ inner_ip_src_addr);
+ memset(ipv6, 0xff, 16);
+ MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask,
+ inner_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask,
+ inner_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask,
+ inner_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask,
+ inner_smac_15_0);
+ break;
+ default:
+ format_id = 23;
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_l3_type);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask,
+ inner_smac_15_0);
+ break;
+ }
+
+ return format_id;
+}
+
+static int mlx5_lag_set_definer(u32 *match_definer_mask,
+ enum mlx5_traffic_types tt, bool tunnel,
+ enum netdev_lag_hash hash)
+{
+ int format_id;
+ u8 *ipv6;
+
+ if (tunnel)
+ return mlx5_lag_set_definer_inner(match_definer_mask, tt);
+
+ switch (tt) {
+ case MLX5_TT_IPV4_UDP:
+ case MLX5_TT_IPV4_TCP:
+ format_id = 22;
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_l4_sport);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_l4_dport);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_ip_src_addr);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_ip_dest_addr);
+ break;
+ case MLX5_TT_IPV4:
+ format_id = 22;
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_l3_type);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_smac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_ip_src_addr);
+ MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask,
+ outer_ip_dest_addr);
+ break;
+ case MLX5_TT_IPV6_TCP:
+ case MLX5_TT_IPV6_UDP:
+ format_id = 29;
+ MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask,
+ outer_l4_sport);
+ MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask,
+ outer_l4_dport);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask,
+ outer_ip_dest_addr);
+ memset(ipv6, 0xff, 16);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask,
+ outer_ip_src_addr);
+ memset(ipv6, 0xff, 16);
+ break;
+ case MLX5_TT_IPV6:
+ format_id = 30;
+ ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask,
+ outer_ip_dest_addr);
+ memset(ipv6, 0xff, 16);
+ ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask,
+ outer_ip_src_addr);
+ memset(ipv6, 0xff, 16);
+ MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask,
+ outer_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask,
+ outer_dmac_15_0);
+ MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask,
+ outer_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask,
+ outer_smac_15_0);
+ break;
+ default:
+ format_id = 0;
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_smac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_smac_15_0);
+
+ if (hash == NETDEV_LAG_HASH_VLAN_SRCMAC) {
+ MLX5_SET_TO_ONES(match_definer_format_0,
+ match_definer_mask,
+ outer_first_vlan_vid);
+ break;
+ }
+
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_ethertype);
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_dmac_47_16);
+ MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask,
+ outer_dmac_15_0);
+ break;
+ }
+
+ return format_id;
+}
+
+static struct mlx5_lag_definer *
+mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash,
+ enum mlx5_traffic_types tt, bool tunnel, u8 *ports)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_definer *lag_definer;
+ u32 *match_definer_mask;
+ int format_id, err;
+
+ lag_definer = kzalloc(sizeof(*lag_definer), GFP_KERNEL);
+ if (!lag_definer)
+ return ERR_PTR(-ENOMEM);
+
+ match_definer_mask = kvzalloc(MLX5_FLD_SZ_BYTES(match_definer,
+ match_mask),
+ GFP_KERNEL);
+ if (!match_definer_mask) {
+ err = -ENOMEM;
+ goto free_lag_definer;
+ }
+
+ format_id = mlx5_lag_set_definer(match_definer_mask, tt, tunnel, hash);
+ lag_definer->definer =
+ mlx5_create_match_definer(dev, MLX5_FLOW_NAMESPACE_PORT_SEL,
+ format_id, match_definer_mask);
+ if (IS_ERR(lag_definer->definer)) {
+ err = PTR_ERR(lag_definer->definer);
+ goto free_mask;
+ }
+
+ err = mlx5_lag_create_port_sel_table(ldev, lag_definer, ports);
+ if (err)
+ goto destroy_match_definer;
+
+ kvfree(match_definer_mask);
+
+ return lag_definer;
+
+destroy_match_definer:
+ mlx5_destroy_match_definer(dev, lag_definer->definer);
+free_mask:
+ kvfree(match_definer_mask);
+free_lag_definer:
+ kfree(lag_definer);
+ return ERR_PTR(err);
+}
+
+static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer *lag_definer)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ int idx;
+ int i;
+ int j;
+
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
+ }
+ }
+ mlx5_destroy_flow_group(lag_definer->fg);
+ mlx5_destroy_flow_table(lag_definer->ft);
+ mlx5_destroy_match_definer(dev, lag_definer->definer);
+ kfree(lag_definer);
+}
+
+static void mlx5_lag_destroy_definers(struct mlx5_lag *ldev)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int tt;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ if (port_sel->outer.definers[tt])
+ mlx5_lag_destroy_definer(ldev,
+ port_sel->outer.definers[tt]);
+ if (port_sel->inner.definers[tt])
+ mlx5_lag_destroy_definer(ldev,
+ port_sel->inner.definers[tt]);
+ }
+}
+
+static int mlx5_lag_create_definers(struct mlx5_lag *ldev,
+ enum netdev_lag_hash hash_type,
+ u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct mlx5_lag_definer *lag_definer;
+ int tt, err;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt,
+ false, ports);
+ if (IS_ERR(lag_definer)) {
+ err = PTR_ERR(lag_definer);
+ goto destroy_definers;
+ }
+ port_sel->outer.definers[tt] = lag_definer;
+
+ if (!port_sel->tunnel)
+ continue;
+
+ lag_definer =
+ mlx5_lag_create_definer(ldev, hash_type, tt,
+ true, ports);
+ if (IS_ERR(lag_definer)) {
+ err = PTR_ERR(lag_definer);
+ goto destroy_definers;
+ }
+ port_sel->inner.definers[tt] = lag_definer;
+ }
+
+ return 0;
+
+destroy_definers:
+ mlx5_lag_destroy_definers(ldev);
+ return err;
+}
+
+static void set_tt_map(struct mlx5_lag_port_sel *port_sel,
+ enum netdev_lag_hash hash)
+{
+ port_sel->tunnel = false;
+
+ switch (hash) {
+ case NETDEV_LAG_HASH_E34:
+ port_sel->tunnel = true;
+ fallthrough;
+ case NETDEV_LAG_HASH_L34:
+ set_bit(MLX5_TT_IPV4_TCP, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV4_UDP, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV6_TCP, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV6_UDP, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV4, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV6, port_sel->tt_map);
+ set_bit(MLX5_TT_ANY, port_sel->tt_map);
+ break;
+ case NETDEV_LAG_HASH_E23:
+ port_sel->tunnel = true;
+ fallthrough;
+ case NETDEV_LAG_HASH_L23:
+ set_bit(MLX5_TT_IPV4, port_sel->tt_map);
+ set_bit(MLX5_TT_IPV6, port_sel->tt_map);
+ set_bit(MLX5_TT_ANY, port_sel->tt_map);
+ break;
+ default:
+ set_bit(MLX5_TT_ANY, port_sel->tt_map);
+ break;
+ }
+}
+
+#define SET_IGNORE_DESTS_BITS(tt_map, dests) \
+ do { \
+ int idx; \
+ \
+ for_each_clear_bit(idx, tt_map, MLX5_NUM_TT) \
+ set_bit(idx, dests); \
+ } while (0)
+
+static void mlx5_lag_set_inner_ttc_params(struct mlx5_lag *ldev,
+ struct ttc_params *ttc_params)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct mlx5_flow_table_attr *ft_attr;
+ int tt;
+
+ ttc_params->ns = mlx5_get_flow_namespace(dev,
+ MLX5_FLOW_NAMESPACE_PORT_SEL);
+ ft_attr = &ttc_params->ft_attr;
+ ft_attr->level = MLX5_LAG_FT_LEVEL_INNER_TTC;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ ttc_params->dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->dests[tt].ft = port_sel->inner.definers[tt]->ft;
+ }
+ SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests);
+}
+
+static void mlx5_lag_set_outer_ttc_params(struct mlx5_lag *ldev,
+ struct ttc_params *ttc_params)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct mlx5_flow_table_attr *ft_attr;
+ int tt;
+
+ ttc_params->ns = mlx5_get_flow_namespace(dev,
+ MLX5_FLOW_NAMESPACE_PORT_SEL);
+ ft_attr = &ttc_params->ft_attr;
+ ft_attr->level = MLX5_LAG_FT_LEVEL_TTC;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ ttc_params->dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->dests[tt].ft = port_sel->outer.definers[tt]->ft;
+ }
+ SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests);
+
+ ttc_params->inner_ttc = port_sel->tunnel;
+ if (!port_sel->tunnel)
+ return;
+
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ ttc_params->tunnel_dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->tunnel_dests[tt].ft =
+ mlx5_get_ttc_flow_table(port_sel->inner.ttc);
+ }
+}
+
+static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct ttc_params ttc_params = {};
+
+ mlx5_lag_set_outer_ttc_params(ldev, &ttc_params);
+ port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params);
+ if (IS_ERR(port_sel->outer.ttc))
+ return PTR_ERR(port_sel->outer.ttc);
+
+ return 0;
+}
+
+static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ struct ttc_params ttc_params = {};
+
+ mlx5_lag_set_inner_ttc_params(ldev, &ttc_params);
+ port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params);
+ if (IS_ERR(port_sel->inner.ttc))
+ return PTR_ERR(port_sel->inner.ttc);
+
+ return 0;
+}
+
+int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
+ enum netdev_lag_hash hash_type, u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int err;
+
+ set_tt_map(port_sel, hash_type);
+ err = mlx5_lag_create_definers(ldev, hash_type, ports);
+ if (err)
+ return err;
+
+ if (port_sel->tunnel) {
+ err = mlx5_lag_create_inner_ttc_table(ldev);
+ if (err)
+ goto destroy_definers;
+ }
+
+ err = mlx5_lag_create_ttc_table(ldev);
+ if (err)
+ goto destroy_inner;
+
+ return 0;
+
+destroy_inner:
+ if (port_sel->tunnel)
+ mlx5_destroy_ttc_table(port_sel->inner.ttc);
+destroy_definers:
+ mlx5_lag_destroy_definers(ldev);
+ return err;
+}
+
+static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer *def,
+ u8 *ports)
+{
+ struct mlx5_flow_destination dest = {};
+ int idx;
+ int err;
+ int i;
+ int j;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+ for (i = 0; i < ldev->ports; i++) {
+ for (j = 0; j < ldev->buckets; j++) {
+ idx = i * ldev->buckets + j;
+ if (ldev->v2p_map[idx] == ports[idx])
+ continue;
+
+ dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
+ vhca_id);
+ err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
+ struct mlx5_lag_definer **definers,
+ u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int err;
+ int tt;
+
+ for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) {
+ err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+ int err;
+
+ err = mlx5_lag_modify_definers_destinations(ldev,
+ port_sel->outer.definers,
+ ports);
+ if (err)
+ return err;
+
+ if (!port_sel->tunnel)
+ return 0;
+
+ return mlx5_lag_modify_definers_destinations(ldev,
+ port_sel->inner.definers,
+ ports);
+}
+
+void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev)
+{
+ struct mlx5_lag_port_sel *port_sel = &ldev->port_sel;
+
+ mlx5_destroy_ttc_table(port_sel->outer.ttc);
+ if (port_sel->tunnel)
+ mlx5_destroy_ttc_table(port_sel->inner.ttc);
+ mlx5_lag_destroy_definers(ldev);
+ memset(port_sel, 0, sizeof(*port_sel));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
new file mode 100644
index 000000000..5ec3af2a3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_LAG_FS_H__
+#define __MLX5_LAG_FS_H__
+
+#include "lib/fs_ttc.h"
+
+struct mlx5_lag_definer {
+ struct mlx5_flow_definer *definer;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ /* Each port has ldev->buckets number of rules and they are arrange in
+ * [port * buckets .. port * buckets + buckets) locations
+ */
+ struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
+};
+
+struct mlx5_lag_ttc {
+ struct mlx5_ttc_table *ttc;
+ struct mlx5_lag_definer *definers[MLX5_NUM_TT];
+};
+
+struct mlx5_lag_port_sel {
+ DECLARE_BITMAP(tt_map, MLX5_NUM_TT);
+ bool tunnel;
+ struct mlx5_lag_ttc outer;
+ struct mlx5_lag_ttc inner;
+};
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports);
+void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev);
+int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
+ enum netdev_lag_hash hash_type, u8 *ports);
+
+#else /* CONFIG_MLX5_ESWITCH */
+static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
+ enum netdev_lag_hash hash_type,
+ u8 *ports)
+{
+ return 0;
+}
+
+static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports)
+{
+ return 0;
+}
+
+static inline void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) {}
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_LAG_FS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
new file mode 100644
index 000000000..c215252f2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/transobj.h>
+#include "clock.h"
+#include "aso.h"
+#include "wq.h"
+
+struct mlx5_aso_cq {
+ /* data path - accessed per cqe */
+ struct mlx5_cqwq wq;
+
+ /* data path - accessed per napi poll */
+ struct mlx5_core_cq mcq;
+
+ /* control */
+ struct mlx5_core_dev *mdev;
+ struct mlx5_wq_ctrl wq_ctrl;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5_aso {
+ /* data path */
+ u16 cc;
+ u16 pc;
+
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg;
+ struct mlx5_aso_cq cq;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ void __iomem *uar_map;
+ u32 sqn;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+
+} ____cacheline_aligned_in_smp;
+
+static void mlx5_aso_free_cq(struct mlx5_aso_cq *cq)
+{
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5_aso_alloc_cq(struct mlx5_core_dev *mdev, int numa_node,
+ void *cqc_data, struct mlx5_aso_cq *cq)
+{
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ struct mlx5_wq_param param;
+ int err;
+ u32 i;
+
+ param.buf_numa_node = numa_node;
+ param.db_numa_node = numa_node;
+
+ err = mlx5_cqwq_create(mdev, &param, cqc_data, &cq->wq, &cq->wq_ctrl);
+ if (err)
+ return err;
+
+ mcq->cqe_sz = 64;
+ mcq->set_ci_db = cq->wq_ctrl.db.db;
+ mcq->arm_db = cq->wq_ctrl.db.db + 1;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+ cqe->op_own = 0xf1;
+ }
+
+ cq->mdev = mdev;
+
+ return 0;
+}
+
+static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data)
+{
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ struct mlx5_core_dev *mdev = cq->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ void *in, *cqc;
+ int inlen, eqn;
+ int err;
+
+ err = mlx5_vector2eqn(mdev, 0, &eqn);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+ memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc));
+
+ mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5_aso_destroy_cq(struct mlx5_aso_cq *cq)
+{
+ mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5_aso_create_cq(struct mlx5_core_dev *mdev, int numa_node,
+ struct mlx5_aso_cq *cq)
+{
+ void *cqc_data;
+ int err;
+
+ cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL);
+ if (!cqc_data)
+ return -ENOMEM;
+
+ MLX5_SET(cqc, cqc_data, log_cq_size, 1);
+ MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc_data, cqe_sz, CQE_STRIDE_128_PAD);
+
+ err = mlx5_aso_alloc_cq(mdev, numa_node, cqc_data, cq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc aso wq cq, err=%d\n", err);
+ goto err_out;
+ }
+
+ err = create_aso_cq(cq, cqc_data);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create aso wq cq, err=%d\n", err);
+ goto err_free_cq;
+ }
+
+ kvfree(cqc_data);
+ return 0;
+
+err_free_cq:
+ mlx5_aso_free_cq(cq);
+err_out:
+ kvfree(cqc_data);
+ return err;
+}
+
+static int mlx5_aso_alloc_sq(struct mlx5_core_dev *mdev, int numa_node,
+ void *sqc_data, struct mlx5_aso *sq)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5_wq_param param;
+ int err;
+
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+
+ param.db_numa_node = numa_node;
+ param.buf_numa_node = numa_node;
+ err = mlx5_wq_cyc_create(mdev, &param, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ return 0;
+}
+
+static int create_aso_sq(struct mlx5_core_dev *mdev, int pdn,
+ void *sqc_data, struct mlx5_aso *sq)
+{
+ void *in, *sqc, *wq;
+ int inlen, err;
+ u8 ts_format;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) * sq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc));
+ MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
+
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+ ts_format = mlx5_is_real_time_sq(mdev) ?
+ MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+ MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index);
+ MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_frag_array(&sq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_aso_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn)
+{
+ void *in, *sqc;
+ int inlen, err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
+
+ err = mlx5_core_modify_sq(mdev, sqn, in);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_aso_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn,
+ void *sqc_data, struct mlx5_aso *sq)
+{
+ int err;
+
+ err = create_aso_sq(mdev, pdn, sqc_data, sq);
+ if (err)
+ return err;
+
+ err = mlx5_aso_set_sq_rdy(mdev, sq->sqn);
+ if (err)
+ mlx5_core_destroy_sq(mdev, sq->sqn);
+
+ return err;
+}
+
+static void mlx5_aso_free_sq(struct mlx5_aso *sq)
+{
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5_aso_destroy_sq(struct mlx5_aso *sq)
+{
+ mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn);
+ mlx5_aso_free_sq(sq);
+}
+
+static int mlx5_aso_create_sq(struct mlx5_core_dev *mdev, int numa_node,
+ u32 pdn, struct mlx5_aso *sq)
+{
+ void *sqc_data, *wq;
+ int err;
+
+ sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL);
+ if (!sqc_data)
+ return -ENOMEM;
+
+ wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, pdn);
+ MLX5_SET(wq, wq, log_wq_sz, 1);
+
+ err = mlx5_aso_alloc_sq(mdev, numa_node, sqc_data, sq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc aso wq sq, err=%d\n", err);
+ goto err_out;
+ }
+
+ err = mlx5_aso_create_sq_rdy(mdev, pdn, sqc_data, sq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to open aso wq sq, err=%d\n", err);
+ goto err_free_asosq;
+ }
+
+ mlx5_core_dbg(mdev, "aso sq->sqn = 0x%x\n", sq->sqn);
+
+ kvfree(sqc_data);
+ return 0;
+
+err_free_asosq:
+ mlx5_aso_free_sq(sq);
+err_out:
+ kvfree(sqc_data);
+ return err;
+}
+
+struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn)
+{
+ int numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ struct mlx5_aso *aso;
+ int err;
+
+ aso = kzalloc(sizeof(*aso), GFP_KERNEL);
+ if (!aso)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_aso_create_cq(mdev, numa_node, &aso->cq);
+ if (err)
+ goto err_cq;
+
+ err = mlx5_aso_create_sq(mdev, numa_node, pdn, aso);
+ if (err)
+ goto err_sq;
+
+ return aso;
+
+err_sq:
+ mlx5_aso_destroy_cq(&aso->cq);
+err_cq:
+ kfree(aso);
+ return ERR_PTR(err);
+}
+
+void mlx5_aso_destroy(struct mlx5_aso *aso)
+{
+ if (IS_ERR_OR_NULL(aso))
+ return;
+
+ mlx5_aso_destroy_sq(aso);
+ mlx5_aso_destroy_cq(&aso->cq);
+ kfree(aso);
+}
+
+void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt,
+ struct mlx5_aso_wqe *aso_wqe,
+ u32 obj_id, u32 opc_mode)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &aso_wqe->ctrl;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((opc_mode << MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT) |
+ (aso->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_ACCESS_ASO);
+ cseg->qpn_ds = cpu_to_be32((aso->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt);
+ cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ cseg->general_id = cpu_to_be32(obj_id);
+}
+
+void *mlx5_aso_get_wqe(struct mlx5_aso *aso)
+{
+ u16 pi;
+
+ pi = mlx5_wq_cyc_ctr2ix(&aso->wq, aso->pc);
+ return mlx5_wq_cyc_get_wqe(&aso->wq, pi);
+}
+
+void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data,
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg)
+{
+ doorbell_cseg->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ if (with_data)
+ aso->pc += MLX5_ASO_WQEBBS_DATA;
+ else
+ aso->pc += MLX5_ASO_WQEBBS;
+ *aso->wq.db = cpu_to_be32(aso->pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)doorbell_cseg, aso->uar_map);
+
+ /* Ensure doorbell is written on uar_page before poll_cq */
+ WRITE_ONCE(doorbell_cseg, NULL);
+}
+
+int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data)
+{
+ struct mlx5_aso_cq *cq = &aso->cq;
+ struct mlx5_cqe64 *cqe;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return -ETIMEDOUT;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ mlx5_cqwq_pop(&cq->wq);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ struct mlx5_err_cqe *err_cqe;
+
+ mlx5_core_err(cq->mdev, "Bad OP in ASOSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+
+ err_cqe = (struct mlx5_err_cqe *)cqe;
+ mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n",
+ err_cqe->vendor_err_synd);
+ mlx5_core_err(cq->mdev, "syndrome=%x\n",
+ err_cqe->syndrome);
+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
+ 16, 1, err_cqe,
+ sizeof(*err_cqe), false);
+ }
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ if (with_data)
+ aso->cc += MLX5_ASO_WQEBBS_DATA;
+ else
+ aso->cc += MLX5_ASO_WQEBBS;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h
new file mode 100644
index 000000000..2d40dcf9d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LIB_ASO_H__
+#define __MLX5_LIB_ASO_H__
+
+#include <linux/mlx5/qp.h>
+#include "mlx5_core.h"
+
+#define MLX5_ASO_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_BB))
+#define MLX5_ASO_WQEBBS_DATA \
+ (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_BB))
+#define ASO_CTRL_READ_EN BIT(0)
+#define MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT 24
+#define MLX5_MACSEC_ASO_DS_CNT (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_DS))
+
+struct mlx5_wqe_aso_ctrl_seg {
+ __be32 va_h;
+ __be32 va_l; /* include read_enable */
+ __be32 l_key;
+ u8 data_mask_mode;
+ u8 condition_1_0_operand;
+ u8 condition_1_0_offset;
+ u8 data_offset_condition_operand;
+ __be32 condition_0_data;
+ __be32 condition_0_mask;
+ __be32 condition_1_data;
+ __be32 condition_1_mask;
+ __be64 bitwise_data;
+ __be64 data_mask;
+};
+
+struct mlx5_wqe_aso_data_seg {
+ __be32 bytewise_data[16];
+};
+
+struct mlx5_aso_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_aso_ctrl_seg aso_ctrl;
+};
+
+struct mlx5_aso_wqe_data {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_aso_ctrl_seg aso_ctrl;
+ struct mlx5_wqe_aso_data_seg aso_data;
+};
+
+enum {
+ MLX5_ASO_LOGICAL_AND,
+ MLX5_ASO_LOGICAL_OR,
+};
+
+enum {
+ MLX5_ASO_ALWAYS_FALSE,
+ MLX5_ASO_ALWAYS_TRUE,
+ MLX5_ASO_EQUAL,
+ MLX5_ASO_NOT_EQUAL,
+ MLX5_ASO_GREATER_OR_EQUAL,
+ MLX5_ASO_LESSER_OR_EQUAL,
+ MLX5_ASO_LESSER,
+ MLX5_ASO_GREATER,
+ MLX5_ASO_CYCLIC_GREATER,
+ MLX5_ASO_CYCLIC_LESSER,
+};
+
+enum {
+ MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT,
+ MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE,
+ MLX5_ASO_DATA_MASK_MODE_CALCULATED_64BYTE,
+};
+
+enum {
+ MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER = 0x2,
+ MLX5_ACCESS_ASO_OPC_MOD_MACSEC = 0x5,
+};
+
+struct mlx5_aso;
+
+void *mlx5_aso_get_wqe(struct mlx5_aso *aso);
+void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt,
+ struct mlx5_aso_wqe *aso_wqe,
+ u32 obj_id, u32 opc_mode);
+void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data,
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg);
+int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data);
+
+struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn);
+void mlx5_aso_destroy(struct mlx5_aso *aso);
+#endif /* __MLX5_LIB_ASO_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
new file mode 100644
index 000000000..2ac255bb9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -0,0 +1,1026 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/highmem.h>
+#include <linux/ptp_clock_kernel.h>
+#include <rdma/mlx5-abi.h>
+#include "lib/eq.h"
+#include "en.h"
+#include "clock.h"
+
+enum {
+ MLX5_CYCLES_SHIFT = 23
+};
+
+enum {
+ MLX5_PIN_MODE_IN = 0x0,
+ MLX5_PIN_MODE_OUT = 0x1,
+};
+
+enum {
+ MLX5_OUT_PATTERN_PULSE = 0x0,
+ MLX5_OUT_PATTERN_PERIODIC = 0x1,
+};
+
+enum {
+ MLX5_EVENT_MODE_DISABLE = 0x0,
+ MLX5_EVENT_MODE_REPETETIVE = 0x1,
+ MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2,
+};
+
+enum {
+ MLX5_MTPPS_FS_ENABLE = BIT(0x0),
+ MLX5_MTPPS_FS_PATTERN = BIT(0x2),
+ MLX5_MTPPS_FS_PIN_MODE = BIT(0x3),
+ MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4),
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5),
+ MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7),
+ MLX5_MTPPS_FS_NPPS_PERIOD = BIT(0x9),
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS = BIT(0xa),
+};
+
+static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev)
+{
+ return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev));
+}
+
+static bool mlx5_npps_real_time_supported(struct mlx5_core_dev *mdev)
+{
+ return (mlx5_real_time_mode(mdev) &&
+ MLX5_CAP_MCAM_FEATURE(mdev, npps_period) &&
+ MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns));
+}
+
+static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify);
+}
+
+static int mlx5_set_mtutc(struct mlx5_core_dev *dev, u32 *mtutc, u32 size)
+{
+ u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+ if (!MLX5_CAP_MCAM_REG(dev, mtutc))
+ return -EOPNOTSUPP;
+
+ return mlx5_core_access_reg(dev, mtutc, size, out, sizeof(out),
+ MLX5_REG_MTUTC, 0, 1);
+}
+
+static u64 mlx5_read_time(struct mlx5_core_dev *dev,
+ struct ptp_system_timestamp *sts,
+ bool real_time)
+{
+ u32 timer_h, timer_h1, timer_l;
+
+ timer_h = ioread32be(real_time ? &dev->iseg->real_time_h :
+ &dev->iseg->internal_timer_h);
+ ptp_read_system_prets(sts);
+ timer_l = ioread32be(real_time ? &dev->iseg->real_time_l :
+ &dev->iseg->internal_timer_l);
+ ptp_read_system_postts(sts);
+ timer_h1 = ioread32be(real_time ? &dev->iseg->real_time_h :
+ &dev->iseg->internal_timer_h);
+ if (timer_h != timer_h1) {
+ /* wrap around */
+ ptp_read_system_prets(sts);
+ timer_l = ioread32be(real_time ? &dev->iseg->real_time_l :
+ &dev->iseg->internal_timer_l);
+ ptp_read_system_postts(sts);
+ }
+
+ return real_time ? REAL_TIME_TO_NS(timer_h1, timer_l) :
+ (u64)timer_l | (u64)timer_h1 << 32;
+}
+
+static u64 read_internal_timer(const struct cyclecounter *cc)
+{
+ struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles);
+ struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer);
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
+
+ return mlx5_read_time(mdev, NULL, false) & cc->mask;
+}
+
+static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+ struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_timer *timer;
+ u32 sign;
+
+ if (!clock_info)
+ return;
+
+ sign = smp_load_acquire(&clock_info->sign);
+ smp_store_mb(clock_info->sign,
+ sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING);
+
+ timer = &clock->timer;
+ clock_info->cycles = timer->tc.cycle_last;
+ clock_info->mult = timer->cycles.mult;
+ clock_info->nsec = timer->tc.nsec;
+ clock_info->frac = timer->tc.frac;
+
+ smp_store_release(&clock_info->sign,
+ sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2);
+}
+
+static void mlx5_pps_out(struct work_struct *work)
+{
+ struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
+ out_work);
+ struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock,
+ pps_info);
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < clock->ptp_info.n_pins; i++) {
+ u64 tstart;
+
+ write_seqlock_irqsave(&clock->lock, flags);
+ tstart = clock->pps_info.start[i];
+ clock->pps_info.start[i] = 0;
+ write_sequnlock_irqrestore(&clock->lock, flags);
+ if (!tstart)
+ continue;
+
+ MLX5_SET(mtpps_reg, in, pin, i);
+ MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
+ MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP);
+ mlx5_set_mtpps(mdev, in, sizeof(in));
+ }
+}
+
+static void mlx5_timestamp_overflow(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlx5_core_dev *mdev;
+ struct mlx5_timer *timer;
+ struct mlx5_clock *clock;
+ unsigned long flags;
+
+ timer = container_of(dwork, struct mlx5_timer, overflow_work);
+ clock = container_of(timer, struct mlx5_clock, timer);
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto out;
+
+ write_seqlock_irqsave(&clock->lock, flags);
+ timecounter_read(&timer->tc);
+ mlx5_update_clock_info_page(mdev);
+ write_sequnlock_irqrestore(&clock->lock, flags);
+
+out:
+ schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
+}
+
+static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev,
+ const struct timespec64 *ts)
+{
+ u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+ if (!mlx5_modify_mtutc_allowed(mdev))
+ return 0;
+
+ if (ts->tv_sec < 0 || ts->tv_sec > U32_MAX ||
+ ts->tv_nsec < 0 || ts->tv_nsec > NSEC_PER_SEC)
+ return -EINVAL;
+
+ MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_SET_TIME_IMMEDIATE);
+ MLX5_SET(mtutc_reg, in, utc_sec, ts->tv_sec);
+ MLX5_SET(mtutc_reg, in, utc_nsec, ts->tv_nsec);
+
+ return mlx5_set_mtutc(mdev, in, sizeof(in));
+}
+
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_timer *timer = &clock->timer;
+ struct mlx5_core_dev *mdev;
+ unsigned long flags;
+ int err;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+ err = mlx5_ptp_settime_real_time(mdev, ts);
+ if (err)
+ return err;
+
+ write_seqlock_irqsave(&clock->lock, flags);
+ timecounter_init(&timer->tc, &timer->cycles, timespec64_to_ns(ts));
+ mlx5_update_clock_info_page(mdev);
+ write_sequnlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static
+struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev,
+ struct ptp_system_timestamp *sts)
+{
+ struct timespec64 ts;
+ u64 time;
+
+ time = mlx5_read_time(mdev, sts, true);
+ ts = ns_to_timespec64(time);
+ return ts;
+}
+
+static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_timer *timer = &clock->timer;
+ struct mlx5_core_dev *mdev;
+ unsigned long flags;
+ u64 cycles, ns;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+ if (mlx5_real_time_mode(mdev)) {
+ *ts = mlx5_ptp_gettimex_real_time(mdev, sts);
+ goto out;
+ }
+
+ write_seqlock_irqsave(&clock->lock, flags);
+ cycles = mlx5_read_time(mdev, sts, false);
+ ns = timecounter_cyc2time(&timer->tc, cycles);
+ write_sequnlock_irqrestore(&clock->lock, flags);
+ *ts = ns_to_timespec64(ns);
+out:
+ return 0;
+}
+
+static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta)
+{
+ u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+ if (!mlx5_modify_mtutc_allowed(mdev))
+ return 0;
+
+ /* HW time adjustment range is s16. If out of range, settime instead */
+ if (delta < S16_MIN || delta > S16_MAX) {
+ struct timespec64 ts;
+ s64 ns;
+
+ ts = mlx5_ptp_gettimex_real_time(mdev, NULL);
+ ns = timespec64_to_ns(&ts) + delta;
+ ts = ns_to_timespec64(ns);
+ return mlx5_ptp_settime_real_time(mdev, &ts);
+ }
+
+ MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_TIME);
+ MLX5_SET(mtutc_reg, in, time_adjustment, delta);
+
+ return mlx5_set_mtutc(mdev, in, sizeof(in));
+}
+
+static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_timer *timer = &clock->timer;
+ struct mlx5_core_dev *mdev;
+ unsigned long flags;
+ int err;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+ err = mlx5_ptp_adjtime_real_time(mdev, delta);
+ if (err)
+ return err;
+ write_seqlock_irqsave(&clock->lock, flags);
+ timecounter_adjtime(&timer->tc, delta);
+ mlx5_update_clock_info_page(mdev);
+ write_sequnlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_ptp_adjfreq_real_time(struct mlx5_core_dev *mdev, s32 freq)
+{
+ u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+ if (!mlx5_modify_mtutc_allowed(mdev))
+ return 0;
+
+ MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC);
+ MLX5_SET(mtutc_reg, in, freq_adjustment, freq);
+
+ return mlx5_set_mtutc(mdev, in, sizeof(in));
+}
+
+static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_timer *timer = &clock->timer;
+ struct mlx5_core_dev *mdev;
+ unsigned long flags;
+ int neg_adj = 0;
+ u32 diff;
+ u64 adj;
+ int err;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+ err = mlx5_ptp_adjfreq_real_time(mdev, delta);
+ if (err)
+ return err;
+
+ if (delta < 0) {
+ neg_adj = 1;
+ delta = -delta;
+ }
+
+ adj = timer->nominal_c_mult;
+ adj *= delta;
+ diff = div_u64(adj, 1000000000ULL);
+
+ write_seqlock_irqsave(&clock->lock, flags);
+ timecounter_read(&timer->tc);
+ timer->cycles.mult = neg_adj ? timer->nominal_c_mult - diff :
+ timer->nominal_c_mult + diff;
+ mlx5_update_clock_info_page(mdev);
+ write_sequnlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_extts_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev =
+ container_of(clock, struct mlx5_core_dev, clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ u32 field_select = 0;
+ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
+ int err = 0;
+
+ if (!MLX5_PPS_CAP(mdev))
+ return -EOPNOTSUPP;
+
+ /* Reject requests with unsupported flags */
+ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
+ /* Reject requests to enable time stamping on both edges. */
+ if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+ (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+ (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
+ if (rq->extts.index >= clock->ptp_info.n_pins)
+ return -EINVAL;
+
+ pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
+ if (pin < 0)
+ return -EBUSY;
+
+ if (on) {
+ pin_mode = MLX5_PIN_MODE_IN;
+ pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
+ field_select = MLX5_MTPPS_FS_PIN_MODE |
+ MLX5_MTPPS_FS_PATTERN |
+ MLX5_MTPPS_FS_ENABLE;
+ } else {
+ field_select = MLX5_MTPPS_FS_ENABLE;
+ }
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+ err = mlx5_set_mtpps(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ return mlx5_set_mtppse(mdev, pin, 0,
+ MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u64 cycles_now, cycles_delta;
+ u64 nsec_now, nsec_delta;
+ struct mlx5_timer *timer;
+ unsigned long flags;
+
+ timer = &clock->timer;
+
+ cycles_now = mlx5_read_time(mdev, NULL, false);
+ write_seqlock_irqsave(&clock->lock, flags);
+ nsec_now = timecounter_cyc2time(&timer->tc, cycles_now);
+ nsec_delta = target_ns - nsec_now;
+ cycles_delta = div64_u64(nsec_delta << timer->cycles.shift,
+ timer->cycles.mult);
+ write_sequnlock_irqrestore(&clock->lock, flags);
+
+ return cycles_now + cycles_delta;
+}
+
+static u64 perout_conf_internal_timer(struct mlx5_core_dev *mdev, s64 sec)
+{
+ struct timespec64 ts = {};
+ s64 target_ns;
+
+ ts.tv_sec = sec;
+ target_ns = timespec64_to_ns(&ts);
+
+ return find_target_cycles(mdev, target_ns);
+}
+
+static u64 perout_conf_real_time(s64 sec, u32 nsec)
+{
+ return (u64)nsec | (u64)sec << 32;
+}
+
+static int perout_conf_1pps(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq,
+ u64 *time_stamp, bool real_time)
+{
+ struct timespec64 ts;
+ s64 ns;
+
+ ts.tv_nsec = rq->perout.period.nsec;
+ ts.tv_sec = rq->perout.period.sec;
+ ns = timespec64_to_ns(&ts);
+
+ if ((ns >> 1) != 500000000LL)
+ return -EINVAL;
+
+ *time_stamp = real_time ? perout_conf_real_time(rq->perout.start.sec, 0) :
+ perout_conf_internal_timer(mdev, rq->perout.start.sec);
+
+ return 0;
+}
+
+#define MLX5_MAX_PULSE_DURATION (BIT(__mlx5_bit_sz(mtpps_reg, out_pulse_duration_ns)) - 1)
+static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev,
+ struct ptp_clock_request *rq,
+ u32 *out_pulse_duration_ns)
+{
+ struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ u32 out_pulse_duration;
+ struct timespec64 ts;
+
+ if (rq->perout.flags & PTP_PEROUT_DUTY_CYCLE) {
+ ts.tv_sec = rq->perout.on.sec;
+ ts.tv_nsec = rq->perout.on.nsec;
+ out_pulse_duration = (u32)timespec64_to_ns(&ts);
+ } else {
+ /* out_pulse_duration_ns should be up to 50% of the
+ * pulse period as default
+ */
+ ts.tv_sec = rq->perout.period.sec;
+ ts.tv_nsec = rq->perout.period.nsec;
+ out_pulse_duration = (u32)timespec64_to_ns(&ts) >> 1;
+ }
+
+ if (out_pulse_duration < pps_info->min_out_pulse_duration_ns ||
+ out_pulse_duration > MLX5_MAX_PULSE_DURATION) {
+ mlx5_core_err(mdev, "NPPS pulse duration %u is not in [%llu, %lu]\n",
+ out_pulse_duration, pps_info->min_out_pulse_duration_ns,
+ MLX5_MAX_PULSE_DURATION);
+ return -EINVAL;
+ }
+ *out_pulse_duration_ns = out_pulse_duration;
+
+ return 0;
+}
+
+static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq,
+ u32 *field_select, u32 *out_pulse_duration_ns,
+ u64 *period, u64 *time_stamp)
+{
+ struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ struct ptp_clock_time *time = &rq->perout.start;
+ struct timespec64 ts;
+
+ ts.tv_sec = rq->perout.period.sec;
+ ts.tv_nsec = rq->perout.period.nsec;
+ if (timespec64_to_ns(&ts) < pps_info->min_npps_period) {
+ mlx5_core_err(mdev, "NPPS period is lower than minimal npps period %llu\n",
+ pps_info->min_npps_period);
+ return -EINVAL;
+ }
+ *period = perout_conf_real_time(rq->perout.period.sec, rq->perout.period.nsec);
+
+ if (mlx5_perout_conf_out_pulse_duration(mdev, rq, out_pulse_duration_ns))
+ return -EINVAL;
+
+ *time_stamp = perout_conf_real_time(time->sec, time->nsec);
+ *field_select |= MLX5_MTPPS_FS_NPPS_PERIOD |
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS;
+
+ return 0;
+}
+
+static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags)
+{
+ return ((!mlx5_npps_real_time_supported(mdev) && flags) ||
+ (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE));
+}
+
+static int mlx5_perout_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev =
+ container_of(clock, struct mlx5_core_dev, clock);
+ bool rt_mode = mlx5_real_time_mode(mdev);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ u32 out_pulse_duration_ns = 0;
+ u32 field_select = 0;
+ u64 npps_period = 0;
+ u64 time_stamp = 0;
+ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
+ int err = 0;
+
+ if (!MLX5_PPS_CAP(mdev))
+ return -EOPNOTSUPP;
+
+ /* Reject requests with unsupported flags */
+ if (mlx5_perout_verify_flags(mdev, rq->perout.flags))
+ return -EOPNOTSUPP;
+
+ if (rq->perout.index >= clock->ptp_info.n_pins)
+ return -EINVAL;
+
+ field_select = MLX5_MTPPS_FS_ENABLE;
+ pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index);
+ if (pin < 0)
+ return -EBUSY;
+
+ if (on) {
+ bool rt_mode = mlx5_real_time_mode(mdev);
+
+ pin_mode = MLX5_PIN_MODE_OUT;
+ pattern = MLX5_OUT_PATTERN_PERIODIC;
+
+ if (rt_mode && rq->perout.start.sec > U32_MAX)
+ return -EINVAL;
+
+ field_select |= MLX5_MTPPS_FS_PIN_MODE |
+ MLX5_MTPPS_FS_PATTERN |
+ MLX5_MTPPS_FS_TIME_STAMP;
+
+ if (mlx5_npps_real_time_supported(mdev))
+ err = perout_conf_npps_real_time(mdev, rq, &field_select,
+ &out_pulse_duration_ns, &npps_period,
+ &time_stamp);
+ else
+ err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode);
+ if (err)
+ return err;
+ }
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+ MLX5_SET(mtpps_reg, in, field_select, field_select);
+ MLX5_SET64(mtpps_reg, in, npps_period, npps_period);
+ MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns);
+ err = mlx5_set_mtpps(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ if (rt_mode)
+ return 0;
+
+ return mlx5_set_mtppse(mdev, pin, 0,
+ MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_pps_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+
+ clock->pps_info.enabled = !!on;
+ return 0;
+}
+
+static int mlx5_ptp_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ return mlx5_extts_configure(ptp, rq, on);
+ case PTP_CLK_REQ_PEROUT:
+ return mlx5_perout_configure(ptp, rq, on);
+ case PTP_CLK_REQ_PPS:
+ return mlx5_pps_configure(ptp, rq, on);
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+enum {
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0),
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1),
+};
+
+static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+
+ switch (func) {
+ case PTP_PF_NONE:
+ return 0;
+ case PTP_PF_EXTTS:
+ return !(clock->pps_info.pin_caps[pin] &
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN);
+ case PTP_PF_PEROUT:
+ return !(clock->pps_info.pin_caps[pin] &
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct ptp_clock_info mlx5_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .name = "mlx5_ptp",
+ .max_adj = 50000000,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+ .n_pins = 0,
+ .pps = 0,
+ .adjfreq = mlx5_ptp_adjfreq,
+ .adjtime = mlx5_ptp_adjtime,
+ .gettimex64 = mlx5_ptp_gettimex,
+ .settime64 = mlx5_ptp_settime,
+ .enable = NULL,
+ .verify = NULL,
+};
+
+static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
+ u32 *mtpps, u32 mtpps_size)
+{
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {};
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps,
+ mtpps_size, MLX5_REG_MTPPS, 0, 0);
+}
+
+static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
+{
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+ u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {};
+ u8 mode;
+ int err;
+
+ err = mlx5_query_mtpps_pin_mode(mdev, pin, out, sizeof(out));
+ if (err || !MLX5_GET(mtpps_reg, out, enable))
+ return PTP_PF_NONE;
+
+ mode = MLX5_GET(mtpps_reg, out, pin_mode);
+
+ if (mode == MLX5_PIN_MODE_IN)
+ return PTP_PF_EXTTS;
+ else if (mode == MLX5_PIN_MODE_OUT)
+ return PTP_PF_PEROUT;
+
+ return PTP_PF_NONE;
+}
+
+static void mlx5_init_pin_config(struct mlx5_clock *clock)
+{
+ int i;
+
+ if (!clock->ptp_info.n_pins)
+ return;
+
+ clock->ptp_info.pin_config =
+ kcalloc(clock->ptp_info.n_pins,
+ sizeof(*clock->ptp_info.pin_config),
+ GFP_KERNEL);
+ if (!clock->ptp_info.pin_config)
+ return;
+ clock->ptp_info.enable = mlx5_ptp_enable;
+ clock->ptp_info.verify = mlx5_ptp_verify;
+ clock->ptp_info.pps = 1;
+
+ for (i = 0; i < clock->ptp_info.n_pins; i++) {
+ snprintf(clock->ptp_info.pin_config[i].name,
+ sizeof(clock->ptp_info.pin_config[i].name),
+ "mlx5_pps%d", i);
+ clock->ptp_info.pin_config[i].index = i;
+ clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i);
+ clock->ptp_info.pin_config[i].chan = 0;
+ }
+}
+
+static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ mlx5_query_mtpps(mdev, out, sizeof(out));
+
+ clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out,
+ cap_number_of_pps_pins);
+ clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out,
+ cap_max_num_of_pps_in_pins);
+ clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
+ cap_max_num_of_pps_out_pins);
+
+ if (MLX5_CAP_MCAM_FEATURE(mdev, npps_period))
+ clock->pps_info.min_npps_period = 1 << MLX5_GET(mtpps_reg, out,
+ cap_log_min_npps_period);
+ if (MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns))
+ clock->pps_info.min_out_pulse_duration_ns = 1 << MLX5_GET(mtpps_reg, out,
+ cap_log_min_out_pulse_duration_ns);
+
+ clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
+ clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
+ clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
+ clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
+ clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
+ clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
+ clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
+ clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
+}
+
+static void ts_next_sec(struct timespec64 *ts)
+{
+ ts->tv_sec += 1;
+ ts->tv_nsec = 0;
+}
+
+static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev,
+ struct mlx5_clock *clock)
+{
+ struct timespec64 ts;
+ s64 target_ns;
+
+ mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL);
+ ts_next_sec(&ts);
+ target_ns = timespec64_to_ns(&ts);
+
+ return find_target_cycles(mdev, target_ns);
+}
+
+static int mlx5_pps_event(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+ struct ptp_clock_event ptp_event;
+ struct mlx5_eqe *eqe = data;
+ int pin = eqe->data.pps.pin;
+ struct mlx5_core_dev *mdev;
+ unsigned long flags;
+ u64 ns;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+ switch (clock->ptp_info.pin_config[pin].func) {
+ case PTP_PF_EXTTS:
+ ptp_event.index = pin;
+ ptp_event.timestamp = mlx5_real_time_mode(mdev) ?
+ mlx5_real_time_cyc2time(clock,
+ be64_to_cpu(eqe->data.pps.time_stamp)) :
+ mlx5_timecounter_cyc2time(clock,
+ be64_to_cpu(eqe->data.pps.time_stamp));
+ if (clock->pps_info.enabled) {
+ ptp_event.type = PTP_CLOCK_PPSUSR;
+ ptp_event.pps_times.ts_real =
+ ns_to_timespec64(ptp_event.timestamp);
+ } else {
+ ptp_event.type = PTP_CLOCK_EXTTS;
+ }
+ /* TODOL clock->ptp can be NULL if ptp_clock_register fails */
+ ptp_clock_event(clock->ptp, &ptp_event);
+ break;
+ case PTP_PF_PEROUT:
+ ns = perout_conf_next_event_timer(mdev, clock);
+ write_seqlock_irqsave(&clock->lock, flags);
+ clock->pps_info.start[pin] = ns;
+ write_sequnlock_irqrestore(&clock->lock, flags);
+ schedule_work(&clock->pps_info.out_work);
+ break;
+ default:
+ mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
+ clock->ptp_info.pin_config[pin].func);
+ }
+
+ return NOTIFY_OK;
+}
+
+static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_timer *timer = &clock->timer;
+ u32 dev_freq;
+
+ dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz);
+ timer->cycles.read = read_internal_timer;
+ timer->cycles.shift = MLX5_CYCLES_SHIFT;
+ timer->cycles.mult = clocksource_khz2mult(dev_freq,
+ timer->cycles.shift);
+ timer->nominal_c_mult = timer->cycles.mult;
+ timer->cycles.mask = CLOCKSOURCE_MASK(41);
+
+ timecounter_init(&timer->tc, &timer->cycles,
+ ktime_to_ns(ktime_get_real()));
+}
+
+static void mlx5_init_overflow_period(struct mlx5_clock *clock)
+{
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
+ struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+ struct mlx5_timer *timer = &clock->timer;
+ u64 overflow_cycles;
+ u64 frac = 0;
+ u64 ns;
+
+ /* Calculate period in seconds to call the overflow watchdog - to make
+ * sure counter is checked at least twice every wrap around.
+ * The period is calculated as the minimum between max HW cycles count
+ * (The clock source mask) and max amount of cycles that can be
+ * multiplied by clock multiplier where the result doesn't exceed
+ * 64bits.
+ */
+ overflow_cycles = div64_u64(~0ULL >> 1, timer->cycles.mult);
+ overflow_cycles = min(overflow_cycles, div_u64(timer->cycles.mask, 3));
+
+ ns = cyclecounter_cyc2ns(&timer->cycles, overflow_cycles,
+ frac, &frac);
+ do_div(ns, NSEC_PER_SEC / HZ);
+ timer->overflow_period = ns;
+
+ INIT_DELAYED_WORK(&timer->overflow_work, mlx5_timestamp_overflow);
+ if (timer->overflow_period)
+ schedule_delayed_work(&timer->overflow_work, 0);
+ else
+ mlx5_core_warn(mdev,
+ "invalid overflow period, overflow_work is not scheduled\n");
+
+ if (clock_info)
+ clock_info->overflow_period = timer->overflow_period;
+}
+
+static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_ib_clock_info *info;
+ struct mlx5_timer *timer;
+
+ mdev->clock_info = (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL);
+ if (!mdev->clock_info) {
+ mlx5_core_warn(mdev, "Failed to allocate IB clock info page\n");
+ return;
+ }
+
+ info = mdev->clock_info;
+ timer = &clock->timer;
+
+ info->nsec = timer->tc.nsec;
+ info->cycles = timer->tc.cycle_last;
+ info->mask = timer->cycles.mask;
+ info->mult = timer->nominal_c_mult;
+ info->shift = timer->cycles.shift;
+ info->frac = timer->tc.frac;
+}
+
+static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+
+ mlx5_timecounter_init(mdev);
+ mlx5_init_clock_info(mdev);
+ mlx5_init_overflow_period(clock);
+ clock->ptp_info = mlx5_ptp_clock_info;
+
+ if (mlx5_real_time_mode(mdev)) {
+ struct timespec64 ts;
+
+ ktime_get_real_ts64(&ts);
+ mlx5_ptp_settime(&clock->ptp_info, &ts);
+ }
+}
+
+static void mlx5_init_pps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+
+ if (!MLX5_PPS_CAP(mdev))
+ return;
+
+ mlx5_get_pps_caps(mdev);
+ mlx5_init_pin_config(clock);
+}
+
+void mlx5_init_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) {
+ mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+ return;
+ }
+
+ seqlock_init(&clock->lock);
+ mlx5_init_timer_clock(mdev);
+ INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
+
+ /* Configure the PHC */
+ clock->ptp_info = mlx5_ptp_clock_info;
+
+ /* Initialize 1PPS data structures */
+ mlx5_init_pps(mdev);
+
+ clock->ptp = ptp_clock_register(&clock->ptp_info,
+ &mdev->pdev->dev);
+ if (IS_ERR(clock->ptp)) {
+ mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n",
+ PTR_ERR(clock->ptp));
+ clock->ptp = NULL;
+ }
+
+ MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
+ mlx5_eq_notifier_register(mdev, &clock->pps_nb);
+}
+
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
+ if (clock->ptp) {
+ ptp_clock_unregister(clock->ptp);
+ clock->ptp = NULL;
+ }
+
+ cancel_work_sync(&clock->pps_info.out_work);
+ cancel_delayed_work_sync(&clock->timer.overflow_work);
+
+ if (mdev->clock_info) {
+ free_page((unsigned long)mdev->clock_info);
+ mdev->clock_info = NULL;
+ }
+
+ kfree(clock->ptp_info.pin_config);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
new file mode 100644
index 000000000..bd95b9f8d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_CLOCK_H__
+#define __LIB_CLOCK_H__
+
+static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev)
+{
+ u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format);
+
+ return (rq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
+ rq_ts_format_cap ==
+ MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME);
+}
+
+static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev)
+{
+ u8 sq_ts_format_cap = MLX5_CAP_GEN(mdev, sq_ts_format);
+
+ return (sq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
+ sq_ts_format_cap ==
+ MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME);
+}
+
+typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64);
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+void mlx5_init_clock(struct mlx5_core_dev *mdev);
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
+
+static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
+{
+ return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1;
+}
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ struct mlx5_timer *timer = &clock->timer;
+ unsigned int seq;
+ u64 nsec;
+
+ do {
+ seq = read_seqbegin(&clock->lock);
+ nsec = timecounter_cyc2time(&timer->tc, timestamp);
+ } while (read_seqretry(&clock->lock, seq));
+
+ return ns_to_ktime(nsec);
+}
+
+#define REAL_TIME_TO_NS(hi, low) (((u64)hi) * NSEC_PER_SEC + ((u64)low))
+
+static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ u64 time = REAL_TIME_TO_NS(timestamp >> 32, timestamp & 0xFFFFFFFF);
+
+ return ns_to_ktime(time);
+}
+#else
+static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
+static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
+{
+ return -1;
+}
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ return 0;
+}
+
+static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ return 0;
+}
+#endif
+
+static inline cqe_ts_to_ns mlx5_rq_ts_translator(struct mlx5_core_dev *mdev)
+{
+ return mlx5_is_real_time_rq(mdev) ? mlx5_real_time_cyc2time :
+ mlx5_timecounter_cyc2time;
+}
+
+static inline cqe_ts_to_ns mlx5_sq_ts_translator(struct mlx5_core_dev *mdev)
+{
+ return mlx5_is_real_time_sq(mdev) ? mlx5_real_time_cyc2time :
+ mlx5_timecounter_cyc2time;
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
new file mode 100644
index 000000000..e995f8378
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+ void *key, u32 sz_bytes,
+ u32 key_type, u32 *p_key_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u32 sz_bits = sz_bytes * BITS_PER_BYTE;
+ u8 general_obj_key_size;
+ u64 general_obj_types;
+ void *obj, *key_p;
+ int err;
+
+ obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object);
+ key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key);
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY))
+ return -EINVAL;
+
+ switch (sz_bits) {
+ case 128:
+ general_obj_key_size =
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128;
+ key_p += sz_bytes;
+ break;
+ case 256:
+ general_obj_key_size =
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ memcpy(key_p, key, sz_bytes);
+
+ MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size);
+ MLX5_SET(encryption_key_obj, obj, key_type, key_type);
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+ MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ /* avoid leaking key on the stack */
+ memzero_explicit(in, sizeof(in));
+
+ return err;
+}
+
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
new file mode 100644
index 000000000..b7d779d08
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
+#include "mlx5_core.h"
+
+static LIST_HEAD(devcom_list);
+
+#define devcom_for_each_component(priv, comp, iter) \
+ for (iter = 0; \
+ comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \
+ iter++)
+
+struct mlx5_devcom_component {
+ struct {
+ void __rcu *data;
+ } device[MLX5_DEVCOM_PORTS_SUPPORTED];
+
+ mlx5_devcom_event_handler_t handler;
+ struct rw_semaphore sem;
+ bool paired;
+};
+
+struct mlx5_devcom_list {
+ struct list_head list;
+
+ struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS];
+ struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED];
+};
+
+struct mlx5_devcom {
+ struct mlx5_devcom_list *priv;
+ int idx;
+};
+
+static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void)
+{
+ struct mlx5_devcom_component *comp;
+ struct mlx5_devcom_list *priv;
+ int i;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ devcom_for_each_component(priv, comp, i)
+ init_rwsem(&comp->sem);
+
+ return priv;
+}
+
+static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv,
+ u8 idx)
+{
+ struct mlx5_devcom *devcom;
+
+ devcom = kzalloc(sizeof(*devcom), GFP_KERNEL);
+ if (!devcom)
+ return NULL;
+
+ devcom->priv = priv;
+ devcom->idx = idx;
+ return devcom;
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_devcom_list *priv = NULL, *iter;
+ struct mlx5_devcom *devcom = NULL;
+ bool new_priv = false;
+ u64 sguid0, sguid1;
+ int idx, i;
+
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+ if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
+ return NULL;
+
+ mlx5_dev_list_lock();
+ sguid0 = mlx5_query_nic_system_image_guid(dev);
+ list_for_each_entry(iter, &devcom_list, list) {
+ struct mlx5_core_dev *tmp_dev = NULL;
+
+ idx = -1;
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
+ if (iter->devs[i])
+ tmp_dev = iter->devs[i];
+ else
+ idx = i;
+ }
+
+ if (idx == -1)
+ continue;
+
+ sguid1 = mlx5_query_nic_system_image_guid(tmp_dev);
+ if (sguid0 != sguid1)
+ continue;
+
+ priv = iter;
+ break;
+ }
+
+ if (!priv) {
+ priv = mlx5_devcom_list_alloc();
+ if (!priv) {
+ devcom = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ idx = 0;
+ new_priv = true;
+ }
+
+ priv->devs[idx] = dev;
+ devcom = mlx5_devcom_alloc(priv, idx);
+ if (!devcom) {
+ if (new_priv)
+ kfree(priv);
+ devcom = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ if (new_priv)
+ list_add(&priv->list, &devcom_list);
+out:
+ mlx5_dev_list_unlock();
+ return devcom;
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
+{
+ struct mlx5_devcom_list *priv;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ mlx5_dev_list_lock();
+ priv = devcom->priv;
+ priv->devs[devcom->idx] = NULL;
+
+ kfree(devcom);
+
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
+ if (priv->devs[i])
+ break;
+
+ if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
+ goto out;
+
+ list_del(&priv->list);
+ kfree(priv);
+out:
+ mlx5_dev_list_unlock();
+}
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ mlx5_devcom_event_handler_t handler,
+ void *data)
+{
+ struct mlx5_devcom_component *comp;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ WARN_ON(!data);
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ comp->handler = handler;
+ rcu_assign_pointer(comp->device[devcom->idx].data, data);
+ up_write(&comp->sem);
+}
+
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL);
+ up_write(&comp->sem);
+ synchronize_rcu();
+}
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ int event,
+ void *event_data)
+{
+ struct mlx5_devcom_component *comp;
+ int err = -ENODEV, i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return err;
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
+ void *data = rcu_dereference_protected(comp->device[i].data,
+ lockdep_is_held(&comp->sem));
+
+ if (i != devcom->idx && data) {
+ err = comp->handler(event, data, event_data);
+ break;
+ }
+ }
+
+ up_write(&comp->sem);
+ return err;
+}
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ bool paired)
+{
+ struct mlx5_devcom_component *comp;
+
+ comp = &devcom->priv->components[id];
+ WARN_ON(!rwsem_is_locked(&comp->sem));
+
+ WRITE_ONCE(comp->paired, paired);
+}
+
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ if (IS_ERR_OR_NULL(devcom))
+ return false;
+
+ return READ_ONCE(devcom->priv->components[id].paired);
+}
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return NULL;
+
+ comp = &devcom->priv->components[id];
+ down_read(&comp->sem);
+ if (!READ_ONCE(comp->paired)) {
+ up_read(&comp->sem);
+ return NULL;
+ }
+
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
+ if (i != devcom->idx)
+ break;
+
+ return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem));
+}
+
+void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return NULL;
+
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
+ if (i != devcom->idx)
+ break;
+
+ comp = &devcom->priv->components[id];
+ /* This can change concurrently, however 'data' pointer will remain
+ * valid for the duration of RCU read section.
+ */
+ if (!READ_ONCE(comp->paired))
+ return NULL;
+
+ return rcu_dereference(comp->device[i].data);
+}
+
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp = &devcom->priv->components[id];
+
+ up_read(&comp->sem);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
new file mode 100644
index 000000000..9a496f472
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_DEVCOM_H__
+#define __LIB_MLX5_DEVCOM_H__
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5_DEVCOM_PORTS_SUPPORTED 2
+
+enum mlx5_devcom_components {
+ MLX5_DEVCOM_ESW_OFFLOADS,
+
+ MLX5_DEVCOM_NUM_COMPONENTS,
+};
+
+typedef int (*mlx5_devcom_event_handler_t)(int event,
+ void *my_data,
+ void *event_data);
+
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev);
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom);
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ mlx5_devcom_event_handler_t handler,
+ void *data);
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ int event,
+ void *event_data);
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ bool paired);
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id);
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+#endif
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
new file mode 100644
index 000000000..9482e51ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+struct mlx5_dm {
+ /* protect access to icm bitmask */
+ spinlock_t lock;
+ unsigned long *steering_sw_icm_alloc_blocks;
+ unsigned long *header_modify_sw_icm_alloc_blocks;
+ unsigned long *header_modify_pattern_sw_icm_alloc_blocks;
+};
+
+struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
+{
+ u64 header_modify_pattern_icm_blocks = 0;
+ u64 header_modify_icm_blocks = 0;
+ u64 steering_icm_blocks = 0;
+ struct mlx5_dm *dm;
+ bool support_v2;
+
+ if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM))
+ return NULL;
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&dm->lock);
+
+ if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) {
+ steering_icm_blocks =
+ BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ dm->steering_sw_icm_alloc_blocks =
+ bitmap_zalloc(steering_icm_blocks, GFP_KERNEL);
+ if (!dm->steering_sw_icm_alloc_blocks)
+ goto err_steering;
+ }
+
+ if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) {
+ header_modify_icm_blocks =
+ BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ dm->header_modify_sw_icm_alloc_blocks =
+ bitmap_zalloc(header_modify_icm_blocks, GFP_KERNEL);
+ if (!dm->header_modify_sw_icm_alloc_blocks)
+ goto err_modify_hdr;
+ }
+
+ support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) &&
+ MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address);
+
+ if (support_v2) {
+ header_modify_pattern_icm_blocks =
+ BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ dm->header_modify_pattern_sw_icm_alloc_blocks =
+ bitmap_zalloc(header_modify_pattern_icm_blocks, GFP_KERNEL);
+ if (!dm->header_modify_pattern_sw_icm_alloc_blocks)
+ goto err_pattern;
+ }
+
+ return dm;
+
+err_pattern:
+ bitmap_free(dm->header_modify_sw_icm_alloc_blocks);
+
+err_modify_hdr:
+ bitmap_free(dm->steering_sw_icm_alloc_blocks);
+
+err_steering:
+ kfree(dm);
+
+ return ERR_PTR(-ENOMEM);
+}
+
+void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_dm *dm = dev->dm;
+
+ if (!dev->dm)
+ return;
+
+ if (dm->steering_sw_icm_alloc_blocks) {
+ WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks,
+ BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+ bitmap_free(dm->steering_sw_icm_alloc_blocks);
+ }
+
+ if (dm->header_modify_sw_icm_alloc_blocks) {
+ WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks,
+ BIT(MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+ bitmap_free(dm->header_modify_sw_icm_alloc_blocks);
+ }
+
+ if (dm->header_modify_pattern_sw_icm_alloc_blocks) {
+ WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks,
+ BIT(MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+ bitmap_free(dm->header_modify_pattern_sw_icm_alloc_blocks);
+ }
+
+ kfree(dm);
+}
+
+int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+ u64 length, u32 log_alignment, u16 uid,
+ phys_addr_t *addr, u32 *obj_id)
+{
+ u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
+ struct mlx5_dm *dm = dev->dm;
+ unsigned long *block_map;
+ u64 icm_start_addr;
+ u32 log_icm_size;
+ u64 align_mask;
+ u32 max_blocks;
+ u64 block_idx;
+ void *sw_icm;
+ int ret;
+
+ if (!dev->dm)
+ return -EOPNOTSUPP;
+
+ if (!length || (length & (length - 1)) ||
+ length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1))
+ return -EINVAL;
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+ MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+ switch (type) {
+ case MLX5_SW_ICM_TYPE_STEERING:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+ log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
+ block_map = dm->steering_sw_icm_alloc_blocks;
+ break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+ log_icm_size = MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_sw_icm_size);
+ block_map = dm->header_modify_sw_icm_alloc_blocks;
+ break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ log_icm_size = MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_pattern_sw_icm_size);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!block_map)
+ return -EOPNOTSUPP;
+
+ max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ if (log_alignment < MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+ log_alignment = MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+ align_mask = BIT(log_alignment - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) - 1;
+
+ spin_lock(&dm->lock);
+ block_idx = bitmap_find_next_zero_area(block_map, max_blocks, 0,
+ num_blocks, align_mask);
+
+ if (block_idx < max_blocks)
+ bitmap_set(block_map,
+ block_idx, num_blocks);
+
+ spin_unlock(&dm->lock);
+
+ if (block_idx >= max_blocks)
+ return -ENOMEM;
+
+ sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
+ icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+ MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
+ icm_start_addr);
+ MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret) {
+ spin_lock(&dm->lock);
+ bitmap_clear(block_map,
+ block_idx, num_blocks);
+ spin_unlock(&dm->lock);
+
+ return ret;
+ }
+
+ *addr = icm_start_addr;
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc);
+
+int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+ u64 length, u16 uid, phys_addr_t addr, u32 obj_id)
+{
+ u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ struct mlx5_dm *dm = dev->dm;
+ unsigned long *block_map;
+ u64 icm_start_addr;
+ u64 start_idx;
+ int err;
+
+ if (!dev->dm)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case MLX5_SW_ICM_TYPE_STEERING:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+ block_map = dm->steering_sw_icm_alloc_blocks;
+ break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+ block_map = dm->header_modify_sw_icm_alloc_blocks;
+ break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+ header_modify_pattern_sw_icm_start_address);
+ block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+ spin_lock(&dm->lock);
+ bitmap_clear(block_map,
+ start_idx, num_blocks);
+ spin_unlock(&dm->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
new file mode 100644
index 000000000..d3d628b86
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018-2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __LIB_MLX5_EQ_H__
+#define __LIB_MLX5_EQ_H__
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/mlx5/cq.h>
+
+#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe))
+
+struct mlx5_eq_tasklet {
+ struct list_head list;
+ struct list_head process_list;
+ struct tasklet_struct task;
+ spinlock_t lock; /* lock completion tasklet list */
+};
+
+struct mlx5_cq_table {
+ spinlock_t lock; /* protect radix tree */
+ struct radix_tree_root tree;
+};
+
+struct mlx5_eq {
+ struct mlx5_frag_buf_ctrl fbc;
+ struct mlx5_frag_buf frag_buf;
+ struct mlx5_core_dev *dev;
+ struct mlx5_cq_table cq_table;
+ __be32 __iomem *doorbell;
+ u32 cons_index;
+ unsigned int vecidx;
+ unsigned int irqn;
+ u8 eqn;
+ struct mlx5_rsc_debug *dbg;
+ struct mlx5_irq *irq;
+};
+
+struct mlx5_eq_async {
+ struct mlx5_eq core;
+ struct notifier_block irq_nb;
+ spinlock_t lock; /* To avoid irq EQ handle races with resiliency flows */
+};
+
+struct mlx5_eq_comp {
+ struct mlx5_eq core;
+ struct notifier_block irq_nb;
+ struct mlx5_eq_tasklet tasklet_ctx;
+ struct list_head list;
+};
+
+static inline u32 eq_get_size(struct mlx5_eq *eq)
+{
+ return eq->fbc.sz_m1 + 1;
+}
+
+static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+ return mlx5_frag_buf_get_wqe(&eq->fbc, entry);
+}
+
+static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & eq->fbc.sz_m1);
+
+ return (eqe->owner ^ (eq->cons_index >> eq->fbc.log_sz)) & 1 ? NULL : eqe;
+}
+
+static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev);
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_eq_table_create(struct mlx5_core_dev *dev);
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t);
+struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
+#endif
+
+int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
new file mode 100644
index 000000000..df58cba37
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -0,0 +1,810 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies.
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/fs.h>
+
+#include "lib/fs_chains.h"
+#include "fs_ft_pool.h"
+#include "en/mapping.h"
+#include "fs_core.h"
+#include "en_tc.h"
+
+#define chains_lock(chains) ((chains)->lock)
+#define chains_ht(chains) ((chains)->chains_ht)
+#define prios_ht(chains) ((chains)->prios_ht)
+#define tc_default_ft(chains) ((chains)->tc_default_ft)
+#define tc_end_ft(chains) ((chains)->tc_end_ft)
+#define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \
+ FDB_TC_OFFLOAD : MLX5E_TC_PRIO)
+#define FT_TBL_SZ (64 * 1024)
+
+struct mlx5_fs_chains {
+ struct mlx5_core_dev *dev;
+
+ struct rhashtable chains_ht;
+ struct rhashtable prios_ht;
+ /* Protects above chains_ht and prios_ht */
+ struct mutex lock;
+
+ struct mlx5_flow_table *tc_default_ft;
+ struct mlx5_flow_table *tc_end_ft;
+ struct mapping_ctx *chains_mapping;
+
+ enum mlx5_flow_namespace_type ns;
+ u32 group_num;
+ u32 flags;
+};
+
+struct fs_chain {
+ struct rhash_head node;
+
+ u32 chain;
+
+ int ref;
+ int id;
+
+ struct mlx5_fs_chains *chains;
+ struct list_head prios_list;
+ struct mlx5_flow_handle *restore_rule;
+ struct mlx5_modify_hdr *miss_modify_hdr;
+};
+
+struct prio_key {
+ u32 chain;
+ u32 prio;
+ u32 level;
+};
+
+struct prio {
+ struct rhash_head node;
+ struct list_head list;
+
+ struct prio_key key;
+
+ int ref;
+
+ struct fs_chain *chain;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_table *next_ft;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_handle *miss_rule;
+};
+
+static const struct rhashtable_params chain_params = {
+ .head_offset = offsetof(struct fs_chain, node),
+ .key_offset = offsetof(struct fs_chain, chain),
+ .key_len = sizeof_field(struct fs_chain, chain),
+ .automatic_shrinking = true,
+};
+
+static const struct rhashtable_params prio_params = {
+ .head_offset = offsetof(struct prio, node),
+ .key_offset = offsetof(struct prio, key),
+ .key_len = sizeof_field(struct prio, key),
+ .automatic_shrinking = true,
+};
+
+bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains)
+{
+ return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+}
+
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{
+ return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+}
+
+bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains)
+{
+ return mlx5_chains_prios_supported(chains) &&
+ mlx5_chains_ignore_flow_level_supported(chains);
+}
+
+u32 mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains)
+{
+ if (!mlx5_chains_prios_supported(chains))
+ return 1;
+
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX - 1;
+
+ /* We should get here only for eswitch case */
+ return FDB_TC_MAX_CHAIN;
+}
+
+u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
+{
+ return mlx5_chains_get_chain_range(chains) + 1;
+}
+
+u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+{
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX;
+
+ if (!chains->dev->priv.eswitch ||
+ chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
+ return 1;
+
+ /* We should get here only for eswitch case */
+ return FDB_TC_MAX_PRIO;
+}
+
+static unsigned int mlx5_chains_get_level_range(struct mlx5_fs_chains *chains)
+{
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX;
+
+ /* Same value for FDB and NIC RX tables */
+ return FDB_TC_LEVELS_PER_PRIO;
+}
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft)
+{
+ tc_end_ft(chains) = ft;
+}
+
+static struct mlx5_flow_table *
+mlx5_chains_create_table(struct mlx5_fs_chains *chains,
+ u32 chain, u32 prio, u32 level)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ int sz;
+
+ if (chains->flags & MLX5_CHAINS_FT_TUNNEL_SUPPORTED)
+ ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+ sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE;
+ ft_attr.max_fte = sz;
+
+ /* We use tc_default_ft(chains) as the table's next_ft till
+ * ignore_flow_level is allowed on FT creation and not just for FTEs.
+ * Instead caller should add an explicit miss rule if needed.
+ */
+ ft_attr.next_ft = tc_default_ft(chains);
+
+ /* The root table(chain 0, prio 1, level 0) is required to be
+ * connected to the previous fs_core managed prio.
+ * We always create it, as a managed table, in order to align with
+ * fs_core logic.
+ */
+ if (!mlx5_chains_ignore_flow_level_supported(chains) ||
+ (chain == 0 && prio == 1 && level == 0)) {
+ ft_attr.level = level;
+ ft_attr.prio = prio - 1;
+ ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
+ mlx5_get_fdb_sub_ns(chains->dev, chain) :
+ mlx5_get_flow_namespace(chains->dev, chains->ns);
+ } else {
+ ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = ns_to_chains_fs_prio(chains->ns);
+ /* Firmware doesn't allow us to create another level 0 table,
+ * so we create all unmanaged tables as level 1.
+ *
+ * To connect them, we use explicit miss rules with
+ * ignore_flow_level. Caller is responsible to create
+ * these rules (if needed).
+ */
+ ft_attr.level = 1;
+ ns = mlx5_get_flow_namespace(chains->dev, chains->ns);
+ }
+
+ ft_attr.autogroup.num_reserved_entries = 2;
+ ft_attr.autogroup.max_num_groups = chains->group_num;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
+ (int)PTR_ERR(ft), chain, prio, level, sz);
+ return ft;
+ }
+
+ return ft;
+}
+
+static int
+create_chain_restore(struct fs_chain *chain)
+{
+ struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+ u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_fs_chains *chains = chain->chains;
+ enum mlx5e_tc_attr_to_reg chain_to_reg;
+ struct mlx5_modify_hdr *mod_hdr;
+ u32 index;
+ int err;
+
+ if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) ||
+ !mlx5_chains_prios_supported(chains))
+ return 0;
+
+ err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
+ if (err)
+ return err;
+ if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
+ /* we got the special default flow tag id, so we won't know
+ * if we actually marked the packet with the restore rule
+ * we create.
+ *
+ * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
+ */
+ err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
+ mapping_remove(chains->chains_mapping, MLX5_FS_DEFAULT_FLOW_TAG);
+ if (err)
+ return err;
+ }
+
+ chain->id = index;
+
+ if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) {
+ chain_to_reg = CHAIN_TO_REG;
+ chain->restore_rule = esw_add_restore_rule(esw, chain->id);
+ if (IS_ERR(chain->restore_rule)) {
+ err = PTR_ERR(chain->restore_rule);
+ goto err_rule;
+ }
+ } else if (chains->ns == MLX5_FLOW_NAMESPACE_KERNEL) {
+ /* For NIC RX we don't need a restore rule
+ * since we write the metadata to reg_b
+ * that is passed to SW directly.
+ */
+ chain_to_reg = NIC_CHAIN_TO_REG;
+ } else {
+ err = -EINVAL;
+ goto err_rule;
+ }
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, field,
+ mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield);
+ MLX5_SET(set_action_in, modact, offset,
+ mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset);
+ MLX5_SET(set_action_in, modact, length,
+ mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen == 32 ?
+ 0 : mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen);
+ MLX5_SET(set_action_in, modact, data, chain->id);
+ mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns,
+ 1, modact);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ goto err_mod_hdr;
+ }
+ chain->miss_modify_hdr = mod_hdr;
+
+ return 0;
+
+err_mod_hdr:
+ if (!IS_ERR_OR_NULL(chain->restore_rule))
+ mlx5_del_flow_rules(chain->restore_rule);
+err_rule:
+ /* Datapath can't find this mapping, so we can safely remove it */
+ mapping_remove(chains->chains_mapping, chain->id);
+ return err;
+}
+
+static void destroy_chain_restore(struct fs_chain *chain)
+{
+ struct mlx5_fs_chains *chains = chain->chains;
+
+ if (!chain->miss_modify_hdr)
+ return;
+
+ if (chain->restore_rule)
+ mlx5_del_flow_rules(chain->restore_rule);
+
+ mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr);
+ mapping_remove(chains->chains_mapping, chain->id);
+}
+
+static struct fs_chain *
+mlx5_chains_create_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+ struct fs_chain *chain_s = NULL;
+ int err;
+
+ chain_s = kvzalloc(sizeof(*chain_s), GFP_KERNEL);
+ if (!chain_s)
+ return ERR_PTR(-ENOMEM);
+
+ chain_s->chains = chains;
+ chain_s->chain = chain;
+ INIT_LIST_HEAD(&chain_s->prios_list);
+
+ err = create_chain_restore(chain_s);
+ if (err)
+ goto err_restore;
+
+ err = rhashtable_insert_fast(&chains_ht(chains), &chain_s->node,
+ chain_params);
+ if (err)
+ goto err_insert;
+
+ return chain_s;
+
+err_insert:
+ destroy_chain_restore(chain_s);
+err_restore:
+ kvfree(chain_s);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_chain(struct fs_chain *chain)
+{
+ struct mlx5_fs_chains *chains = chain->chains;
+
+ rhashtable_remove_fast(&chains_ht(chains), &chain->node,
+ chain_params);
+
+ destroy_chain_restore(chain);
+ kvfree(chain);
+}
+
+static struct fs_chain *
+mlx5_chains_get_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+ struct fs_chain *chain_s;
+
+ chain_s = rhashtable_lookup_fast(&chains_ht(chains), &chain,
+ chain_params);
+ if (!chain_s) {
+ chain_s = mlx5_chains_create_chain(chains, chain);
+ if (IS_ERR(chain_s))
+ return chain_s;
+ }
+
+ chain_s->ref++;
+
+ return chain_s;
+}
+
+static struct mlx5_flow_handle *
+mlx5_chains_add_miss_rule(struct fs_chain *chain,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5_fs_chains *chains = chain->chains;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act act = {};
+
+ act.flags = FLOW_ACT_NO_APPEND;
+ if (mlx5_chains_ignore_flow_level_supported(chain->chains))
+ act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+
+ act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = next_ft;
+
+ if (next_ft == tc_end_ft(chains) &&
+ chain->chain != mlx5_chains_get_nf_ft_chain(chains) &&
+ mlx5_chains_prios_supported(chains)) {
+ act.modify_hdr = chain->miss_modify_hdr;
+ act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ }
+
+ return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1);
+}
+
+static int
+mlx5_chains_update_prio_prevs(struct prio *prio,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
+ struct fs_chain *chain = prio->chain;
+ struct prio *pos;
+ int n = 0, err;
+
+ if (prio->key.level)
+ return 0;
+
+ /* Iterate in reverse order until reaching the level 0 rule of
+ * the previous priority, adding all the miss rules first, so we can
+ * revert them if any of them fails.
+ */
+ pos = prio;
+ list_for_each_entry_continue_reverse(pos,
+ &chain->prios_list,
+ list) {
+ miss_rules[n] = mlx5_chains_add_miss_rule(chain,
+ pos->ft,
+ next_ft);
+ if (IS_ERR(miss_rules[n])) {
+ err = PTR_ERR(miss_rules[n]);
+ goto err_prev_rule;
+ }
+
+ n++;
+ if (!pos->key.level)
+ break;
+ }
+
+ /* Success, delete old miss rules, and update the pointers. */
+ n = 0;
+ pos = prio;
+ list_for_each_entry_continue_reverse(pos,
+ &chain->prios_list,
+ list) {
+ mlx5_del_flow_rules(pos->miss_rule);
+
+ pos->miss_rule = miss_rules[n];
+ pos->next_ft = next_ft;
+
+ n++;
+ if (!pos->key.level)
+ break;
+ }
+
+ return 0;
+
+err_prev_rule:
+ while (--n >= 0)
+ mlx5_del_flow_rules(miss_rules[n]);
+
+ return err;
+}
+
+static void
+mlx5_chains_put_chain(struct fs_chain *chain)
+{
+ if (--chain->ref == 0)
+ mlx5_chains_destroy_chain(chain);
+}
+
+static struct prio *
+mlx5_chains_create_prio(struct mlx5_fs_chains *chains,
+ u32 chain, u32 prio, u32 level)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_handle *miss_rule;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_table *next_ft;
+ struct mlx5_flow_table *ft;
+ struct fs_chain *chain_s;
+ struct list_head *pos;
+ struct prio *prio_s;
+ u32 *flow_group_in;
+ int err;
+
+ chain_s = mlx5_chains_get_chain(chains, chain);
+ if (IS_ERR(chain_s))
+ return ERR_CAST(chain_s);
+
+ prio_s = kvzalloc(sizeof(*prio_s), GFP_KERNEL);
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!prio_s || !flow_group_in) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ /* Chain's prio list is sorted by prio and level.
+ * And all levels of some prio point to the next prio's level 0.
+ * Example list (prio, level):
+ * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
+ * In hardware, we will we have the following pointers:
+ * (3,0) -> (5,0) -> (7,0) -> Slow path
+ * (3,1) -> (5,0)
+ * (5,1) -> (7,0)
+ * (6,1) -> (7,0)
+ */
+
+ /* Default miss for each chain: */
+ next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
+ tc_default_ft(chains) :
+ tc_end_ft(chains);
+ list_for_each(pos, &chain_s->prios_list) {
+ struct prio *p = list_entry(pos, struct prio, list);
+
+ /* exit on first pos that is larger */
+ if (prio < p->key.prio || (prio == p->key.prio &&
+ level < p->key.level)) {
+ /* Get next level 0 table */
+ next_ft = p->key.level == 0 ? p->ft : p->next_ft;
+ break;
+ }
+ }
+
+ ft = mlx5_chains_create_table(chains, chain, prio, level);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_create;
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
+ ft->max_fte - 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ft->max_fte - 1);
+ miss_group = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(miss_group)) {
+ err = PTR_ERR(miss_group);
+ goto err_group;
+ }
+
+ /* Add miss rule to next_ft */
+ miss_rule = mlx5_chains_add_miss_rule(chain_s, ft, next_ft);
+ if (IS_ERR(miss_rule)) {
+ err = PTR_ERR(miss_rule);
+ goto err_miss_rule;
+ }
+
+ prio_s->miss_group = miss_group;
+ prio_s->miss_rule = miss_rule;
+ prio_s->next_ft = next_ft;
+ prio_s->chain = chain_s;
+ prio_s->key.chain = chain;
+ prio_s->key.prio = prio;
+ prio_s->key.level = level;
+ prio_s->ft = ft;
+
+ err = rhashtable_insert_fast(&prios_ht(chains), &prio_s->node,
+ prio_params);
+ if (err)
+ goto err_insert;
+
+ list_add(&prio_s->list, pos->prev);
+
+ /* Table is ready, connect it */
+ err = mlx5_chains_update_prio_prevs(prio_s, ft);
+ if (err)
+ goto err_update;
+
+ kvfree(flow_group_in);
+ return prio_s;
+
+err_update:
+ list_del(&prio_s->list);
+ rhashtable_remove_fast(&prios_ht(chains), &prio_s->node,
+ prio_params);
+err_insert:
+ mlx5_del_flow_rules(miss_rule);
+err_miss_rule:
+ mlx5_destroy_flow_group(miss_group);
+err_group:
+ mlx5_destroy_flow_table(ft);
+err_create:
+err_alloc:
+ kvfree(prio_s);
+ kvfree(flow_group_in);
+ mlx5_chains_put_chain(chain_s);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains,
+ struct prio *prio)
+{
+ struct fs_chain *chain = prio->chain;
+
+ WARN_ON(mlx5_chains_update_prio_prevs(prio,
+ prio->next_ft));
+
+ list_del(&prio->list);
+ rhashtable_remove_fast(&prios_ht(chains), &prio->node,
+ prio_params);
+ mlx5_del_flow_rules(prio->miss_rule);
+ mlx5_destroy_flow_group(prio->miss_group);
+ mlx5_destroy_flow_table(prio->ft);
+ mlx5_chains_put_chain(chain);
+ kvfree(prio);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level)
+{
+ struct mlx5_flow_table *prev_fts;
+ struct prio *prio_s;
+ struct prio_key key;
+ int l = 0;
+
+ if ((chain > mlx5_chains_get_chain_range(chains) &&
+ chain != mlx5_chains_get_nf_ft_chain(chains)) ||
+ prio > mlx5_chains_get_prio_range(chains) ||
+ level > mlx5_chains_get_level_range(chains))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /* create earlier levels for correct fs_core lookup when
+ * connecting tables.
+ */
+ for (l = 0; l < level; l++) {
+ prev_fts = mlx5_chains_get_table(chains, chain, prio, l);
+ if (IS_ERR(prev_fts)) {
+ prio_s = ERR_CAST(prev_fts);
+ goto err_get_prevs;
+ }
+ }
+
+ key.chain = chain;
+ key.prio = prio;
+ key.level = level;
+
+ mutex_lock(&chains_lock(chains));
+ prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+ prio_params);
+ if (!prio_s) {
+ prio_s = mlx5_chains_create_prio(chains, chain,
+ prio, level);
+ if (IS_ERR(prio_s))
+ goto err_create_prio;
+ }
+
+ ++prio_s->ref;
+ mutex_unlock(&chains_lock(chains));
+
+ return prio_s->ft;
+
+err_create_prio:
+ mutex_unlock(&chains_lock(chains));
+err_get_prevs:
+ while (--l >= 0)
+ mlx5_chains_put_table(chains, chain, prio, l);
+ return ERR_CAST(prio_s);
+}
+
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level)
+{
+ struct prio *prio_s;
+ struct prio_key key;
+
+ key.chain = chain;
+ key.prio = prio;
+ key.level = level;
+
+ mutex_lock(&chains_lock(chains));
+ prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+ prio_params);
+ if (!prio_s)
+ goto err_get_prio;
+
+ if (--prio_s->ref == 0)
+ mlx5_chains_destroy_prio(chains, prio_s);
+ mutex_unlock(&chains_lock(chains));
+
+ while (level-- > 0)
+ mlx5_chains_put_table(chains, chain, prio, level);
+
+ return;
+
+err_get_prio:
+ mutex_unlock(&chains_lock(chains));
+ WARN_ONCE(1,
+ "Couldn't find table: (chain: %d prio: %d level: %d)",
+ chain, prio, level);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains)
+{
+ return tc_end_ft(chains);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains)
+{
+ u32 chain, prio, level;
+ int err;
+
+ if (!mlx5_chains_ignore_flow_level_supported(chains)) {
+ err = -EOPNOTSUPP;
+
+ mlx5_core_warn(chains->dev,
+ "Couldn't create global flow table, ignore_flow_level not supported.");
+ goto err_ignore;
+ }
+
+ chain = mlx5_chains_get_chain_range(chains),
+ prio = mlx5_chains_get_prio_range(chains);
+ level = mlx5_chains_get_level_range(chains);
+
+ return mlx5_chains_create_table(chains, chain, prio, level);
+
+err_ignore:
+ return ERR_PTR(err);
+}
+
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft)
+{
+ mlx5_destroy_flow_table(ft);
+}
+
+static struct mlx5_fs_chains *
+mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+ struct mlx5_fs_chains *chains_priv;
+ u32 max_flow_counter;
+ int err;
+
+ chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
+ if (!chains_priv)
+ return ERR_PTR(-ENOMEM);
+
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ mlx5_core_dbg(dev,
+ "Init flow table chains, max counters(%d), groups(%d), max flow table size(%d)\n",
+ max_flow_counter, attr->max_grp_num, attr->max_ft_sz);
+
+ chains_priv->dev = dev;
+ chains_priv->flags = attr->flags;
+ chains_priv->ns = attr->ns;
+ chains_priv->group_num = attr->max_grp_num;
+ chains_priv->chains_mapping = attr->mapping;
+ tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft;
+
+ mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
+ mlx5_chains_get_chain_range(chains_priv),
+ mlx5_chains_get_prio_range(chains_priv));
+
+ err = rhashtable_init(&chains_ht(chains_priv), &chain_params);
+ if (err)
+ goto init_chains_ht_err;
+
+ err = rhashtable_init(&prios_ht(chains_priv), &prio_params);
+ if (err)
+ goto init_prios_ht_err;
+
+ mutex_init(&chains_lock(chains_priv));
+
+ return chains_priv;
+
+init_prios_ht_err:
+ rhashtable_destroy(&chains_ht(chains_priv));
+init_chains_ht_err:
+ kfree(chains_priv);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_cleanup(struct mlx5_fs_chains *chains)
+{
+ mutex_destroy(&chains_lock(chains));
+ rhashtable_destroy(&prios_ht(chains));
+ rhashtable_destroy(&chains_ht(chains));
+
+ kfree(chains);
+}
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+ struct mlx5_fs_chains *chains;
+
+ chains = mlx5_chains_init(dev, attr);
+
+ return chains;
+}
+
+void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains)
+{
+ mlx5_chains_cleanup(chains);
+}
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+ u32 *chain_mapping)
+{
+ struct mapping_ctx *ctx = chains->chains_mapping;
+ struct mlx5_mapped_obj mapped_obj = {};
+
+ mapped_obj.type = MLX5_MAPPED_OBJ_CHAIN;
+ mapped_obj.chain = chain;
+ return mapping_add(ctx, &mapped_obj, chain_mapping);
+}
+
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping)
+{
+ struct mapping_ctx *ctx = chains->chains_mapping;
+
+ return mapping_remove(ctx, chain_mapping);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
new file mode 100644
index 000000000..d50bdb226
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_ESW_CHAINS_H__
+#define __ML5_ESW_CHAINS_H__
+
+#include <linux/mlx5/fs.h>
+
+struct mlx5_fs_chains;
+struct mlx5_mapped_obj;
+
+enum mlx5_chains_flags {
+ MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED = BIT(1),
+ MLX5_CHAINS_FT_TUNNEL_SUPPORTED = BIT(2),
+};
+
+struct mlx5_chains_attr {
+ enum mlx5_flow_namespace_type ns;
+ u32 flags;
+ u32 max_ft_sz;
+ u32 max_grp_num;
+ struct mlx5_flow_table *default_ft;
+ struct mapping_ctx *mapping;
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+bool
+mlx5_chains_prios_supported(struct mlx5_fs_chains *chains);
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains);
+bool
+mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level);
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level);
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains);
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft);
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+ u32 *chain_mapping);
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains,
+ u32 chain_mapping);
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr);
+void mlx5_chains_destroy(struct mlx5_fs_chains *chains);
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline bool
+mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{ return false; }
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level) {};
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) { return ERR_PTR(-EOPNOTSUPP); }
+
+static inline struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{ return NULL; }
+static inline void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains) {};
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __ML5_ESW_CHAINS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
new file mode 100644
index 000000000..b78f2ba25
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
@@ -0,0 +1,608 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/fs_ttc.h"
+
+#define MLX5_TTC_NUM_GROUPS 3
+#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT)
+#define MLX5_TTC_GROUP2_SIZE BIT(1)
+#define MLX5_TTC_GROUP3_SIZE BIT(0)
+#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\
+ MLX5_TTC_GROUP2_SIZE +\
+ MLX5_TTC_GROUP3_SIZE)
+
+#define MLX5_INNER_TTC_NUM_GROUPS 3
+#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3)
+#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1)
+#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0)
+#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\
+ MLX5_INNER_TTC_GROUP2_SIZE +\
+ MLX5_INNER_TTC_GROUP3_SIZE)
+
+/* L3/L4 traffic type classifier */
+struct mlx5_ttc_table {
+ int num_groups;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_group **g;
+ struct mlx5_ttc_rule rules[MLX5_NUM_TT];
+ struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc)
+{
+ return ttc->t;
+}
+
+static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc)
+{
+ int i;
+
+ for (i = 0; i < MLX5_NUM_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
+ mlx5_del_flow_rules(ttc->rules[i].rule);
+ ttc->rules[i].rule = NULL;
+ }
+ }
+
+ for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+ mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+ ttc->tunnel_rules[i] = NULL;
+ }
+ }
+}
+
+struct mlx5_etype_proto {
+ u16 etype;
+ u8 proto;
+};
+
+static struct mlx5_etype_proto ttc_rules[] = {
+ [MLX5_TT_IPV4_TCP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5_TT_IPV6_TCP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5_TT_IPV4_UDP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5_TT_IPV6_UDP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5_TT_IPV4_IPSEC_AH] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5_TT_IPV6_IPSEC_AH] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5_TT_IPV4_IPSEC_ESP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5_TT_IPV6_IPSEC_ESP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5_TT_IPV4] = {
+ .etype = ETH_P_IP,
+ .proto = 0,
+ },
+ [MLX5_TT_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = 0,
+ },
+ [MLX5_TT_ANY] = {
+ .etype = 0,
+ .proto = 0,
+ },
+};
+
+static struct mlx5_etype_proto ttc_tunnel_rules[] = {
+ [MLX5_TT_IPV4_GRE] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5_TT_IPV6_GRE] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5_TT_IPV4_IPIP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_IPIP,
+ },
+ [MLX5_TT_IPV6_IPIP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_IPIP,
+ },
+ [MLX5_TT_IPV4_IPV6] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_IPV6,
+ },
+ [MLX5_TT_IPV6_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_IPV6,
+ },
+
+};
+
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt)
+{
+ return ttc_tunnel_rules[tt].proto;
+}
+
+static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev,
+ u8 proto_type)
+{
+ switch (proto_type) {
+ case IPPROTO_GRE:
+ return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
+ default:
+ return false;
+ }
+}
+
+static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
+{
+ int tt;
+
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (mlx5_tunnel_proto_supported_rx(mdev,
+ ttc_tunnel_rules[tt].proto))
+ return true;
+ }
+ return false;
+}
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+ return (mlx5_tunnel_any_rx_proto_supported(mdev) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version));
+}
+
+static u8 mlx5_etype_to_ipv(u16 ethertype)
+{
+ if (ethertype == ETH_P_IP)
+ return 4;
+
+ if (ethertype == ETH_P_IPV6)
+ return 6;
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest, u16 etype, u8 proto)
+{
+ int match_ipv_outer =
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+ ft_field_support.outer_ip_version);
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+ }
+
+ ipv = mlx5_etype_to_ipv(etype);
+ if (match_ipv_outer && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+ } else if (etype) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_err(dev, "%s: add rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev,
+ struct ttc_params *params,
+ struct mlx5_ttc_table *ttc)
+{
+ struct mlx5_flow_handle **trules;
+ struct mlx5_ttc_rule *rules;
+ struct mlx5_flow_table *ft;
+ int tt;
+ int err;
+
+ ft = ttc->t;
+ rules = ttc->rules;
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ struct mlx5_ttc_rule *rule = &rules[tt];
+
+ if (test_bit(tt, params->ignore_dests))
+ continue;
+ rule->rule = mlx5_generate_ttc_rule(dev, ft, &params->dests[tt],
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rule->rule)) {
+ err = PTR_ERR(rule->rule);
+ rule->rule = NULL;
+ goto del_rules;
+ }
+ rule->default_dest = params->dests[tt];
+ }
+
+ if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev))
+ return 0;
+
+ trules = ttc->tunnel_rules;
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (!mlx5_tunnel_proto_supported_rx(dev,
+ ttc_tunnel_rules[tt].proto))
+ continue;
+ if (test_bit(tt, params->ignore_tunnel_dests))
+ continue;
+ trules[tt] = mlx5_generate_ttc_rule(dev, ft,
+ &params->tunnel_dests[tt],
+ ttc_tunnel_rules[tt].etype,
+ ttc_tunnel_rules[tt].proto);
+ if (IS_ERR(trules[tt])) {
+ err = PTR_ERR(trules[tt]);
+ trules[tt] = NULL;
+ goto del_rules;
+ }
+ }
+
+ return 0;
+
+del_rules:
+ mlx5_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc,
+ bool use_ipv)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL);
+ if (!ttc->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ttc->g);
+ ttc->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ if (use_ipv)
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ttc->g[ttc->num_groups]);
+ ttc->g[ttc->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest,
+ u16 etype, u8 proto)
+{
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ipv = mlx5_etype_to_ipv(etype);
+ if (etype && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+ }
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
+ struct ttc_params *params,
+ struct mlx5_ttc_table *ttc)
+{
+ struct mlx5_ttc_rule *rules;
+ struct mlx5_flow_table *ft;
+ int err;
+ int tt;
+
+ ft = ttc->t;
+ rules = ttc->rules;
+
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ struct mlx5_ttc_rule *rule = &rules[tt];
+
+ if (test_bit(tt, params->ignore_dests))
+ continue;
+ rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
+ &params->dests[tt],
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rule->rule)) {
+ err = PTR_ERR(rule->rule);
+ rule->rule = NULL;
+ goto del_rules;
+ }
+ rule->default_dest = params->dests[tt];
+ }
+
+ return 0;
+
+del_rules:
+
+ mlx5_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g),
+ GFP_KERNEL);
+ if (!ttc->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ttc->g);
+ ttc->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ttc->g[ttc->num_groups]);
+ ttc->g[ttc->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params)
+{
+ struct mlx5_ttc_table *ttc;
+ int err;
+
+ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+ if (!ttc)
+ return ERR_PTR(-ENOMEM);
+
+ WARN_ON_ONCE(params->ft_attr.max_fte);
+ params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE;
+ ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+ if (IS_ERR(ttc->t)) {
+ err = PTR_ERR(ttc->t);
+ kvfree(ttc);
+ return ERR_PTR(err);
+ }
+
+ err = mlx5_create_inner_ttc_table_groups(ttc);
+ if (err)
+ goto destroy_ft;
+
+ err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc);
+ if (err)
+ goto destroy_ft;
+
+ return ttc;
+
+destroy_ft:
+ mlx5_destroy_ttc_table(ttc);
+ return ERR_PTR(err);
+}
+
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc)
+{
+ int i;
+
+ mlx5_cleanup_ttc_rules(ttc);
+ for (i = ttc->num_groups - 1; i >= 0; i--) {
+ if (!IS_ERR_OR_NULL(ttc->g[i]))
+ mlx5_destroy_flow_group(ttc->g[i]);
+ ttc->g[i] = NULL;
+ }
+
+ kfree(ttc->g);
+ mlx5_destroy_flow_table(ttc->t);
+ kvfree(ttc);
+}
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params)
+{
+ bool match_ipv_outer =
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+ ft_field_support.outer_ip_version);
+ struct mlx5_ttc_table *ttc;
+ int err;
+
+ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+ if (!ttc)
+ return ERR_PTR(-ENOMEM);
+
+ WARN_ON_ONCE(params->ft_attr.max_fte);
+ params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE;
+ ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+ if (IS_ERR(ttc->t)) {
+ err = PTR_ERR(ttc->t);
+ kvfree(ttc);
+ return ERR_PTR(err);
+ }
+
+ err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer);
+ if (err)
+ goto destroy_ft;
+
+ err = mlx5_generate_ttc_table_rules(dev, params, ttc);
+ if (err)
+ goto destroy_ft;
+
+ return ttc;
+
+destroy_ft:
+ mlx5_destroy_ttc_table(ttc);
+ return ERR_PTR(err);
+}
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+ struct mlx5_flow_destination *new_dest)
+{
+ return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest,
+ NULL);
+}
+
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type)
+{
+ struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest;
+
+ WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
+ "TTC[%d] default dest is not setup yet", type);
+
+ return *dest;
+}
+
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type)
+{
+ struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type);
+
+ return mlx5_ttc_fwd_dest(ttc, type, &dest);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
new file mode 100644
index 000000000..85fef0cd1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_FS_TTC_H__
+#define __ML5_FS_TTC_H__
+
+#include <linux/mlx5/fs.h>
+
+enum mlx5_traffic_types {
+ MLX5_TT_IPV4_TCP,
+ MLX5_TT_IPV6_TCP,
+ MLX5_TT_IPV4_UDP,
+ MLX5_TT_IPV6_UDP,
+ MLX5_TT_IPV4_IPSEC_AH,
+ MLX5_TT_IPV6_IPSEC_AH,
+ MLX5_TT_IPV4_IPSEC_ESP,
+ MLX5_TT_IPV6_IPSEC_ESP,
+ MLX5_TT_IPV4,
+ MLX5_TT_IPV6,
+ MLX5_TT_ANY,
+ MLX5_NUM_TT,
+ MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY,
+};
+
+enum mlx5_tunnel_types {
+ MLX5_TT_IPV4_GRE,
+ MLX5_TT_IPV6_GRE,
+ MLX5_TT_IPV4_IPIP,
+ MLX5_TT_IPV6_IPIP,
+ MLX5_TT_IPV4_IPV6,
+ MLX5_TT_IPV6_IPV6,
+ MLX5_NUM_TUNNEL_TT,
+};
+
+struct mlx5_ttc_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_destination default_dest;
+};
+
+struct mlx5_ttc_table;
+
+struct ttc_params {
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table_attr ft_attr;
+ struct mlx5_flow_destination dests[MLX5_NUM_TT];
+ DECLARE_BITMAP(ignore_dests, MLX5_NUM_TT);
+ bool inner_ttc;
+ DECLARE_BITMAP(ignore_tunnel_dests, MLX5_NUM_TUNNEL_TT);
+ struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params);
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params);
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+ struct mlx5_flow_destination *new_dest);
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type);
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type);
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt);
+
+#endif /* __MLX5_FS_TTC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
new file mode 100644
index 000000000..6dc83e871
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/kernel.h>
+#include "mlx5_core.h"
+#include "geneve.h"
+
+struct mlx5_geneve {
+ struct mlx5_core_dev *mdev;
+ __be16 opt_class;
+ u8 opt_type;
+ u32 obj_id;
+ struct mutex sync_lock; /* protect GENEVE obj operations */
+ u32 refcount;
+};
+
+static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev,
+ __be16 class,
+ u8 type,
+ u8 len)
+{
+ u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u64 general_obj_types;
+ void *hdr, *opt;
+ u16 obj_id;
+ int err;
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT))
+ return -EINVAL;
+
+ hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr);
+ opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt);
+
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+
+ MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class));
+ MLX5_SET(geneve_tlv_option, opt, option_type, type);
+ MLX5_SET(geneve_tlv_option, opt, option_data_length, len);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ return obj_id;
+}
+
+static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id)
+{
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt)
+{
+ int res = 0;
+
+ if (IS_ERR_OR_NULL(geneve))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&geneve->sync_lock);
+
+ if (geneve->refcount) {
+ if (geneve->opt_class == opt->opt_class &&
+ geneve->opt_type == opt->type) {
+ /* We already have TLV options obj allocated */
+ geneve->refcount++;
+ } else {
+ /* TLV options obj allocated, but its params
+ * do not match the new request.
+ * We support only one such object.
+ */
+ mlx5_core_warn(geneve->mdev,
+ "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n",
+ be16_to_cpu(opt->opt_class),
+ opt->type,
+ opt->length);
+ res = -EOPNOTSUPP;
+ goto unlock;
+ }
+ } else {
+ /* We don't have any TLV options obj allocated */
+
+ res = mlx5_geneve_tlv_option_create(geneve->mdev,
+ opt->opt_class,
+ opt->type,
+ opt->length);
+ if (res < 0) {
+ mlx5_core_warn(geneve->mdev,
+ "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n",
+ be16_to_cpu(opt->opt_class),
+ opt->type, opt->length, res);
+ goto unlock;
+ }
+ geneve->opt_class = opt->opt_class;
+ geneve->opt_type = opt->type;
+ geneve->obj_id = res;
+ geneve->refcount++;
+ res = 0;
+ }
+
+unlock:
+ mutex_unlock(&geneve->sync_lock);
+ return res;
+}
+
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve)
+{
+ if (IS_ERR_OR_NULL(geneve))
+ return;
+
+ mutex_lock(&geneve->sync_lock);
+ if (--geneve->refcount == 0) {
+ /* We've just removed the last user of Geneve option.
+ * Now delete the object in FW.
+ */
+ mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+ geneve->opt_class = 0;
+ geneve->opt_type = 0;
+ geneve->obj_id = 0;
+ }
+ mutex_unlock(&geneve->sync_lock);
+}
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_geneve *geneve =
+ kzalloc(sizeof(*geneve), GFP_KERNEL);
+
+ if (!geneve)
+ return ERR_PTR(-ENOMEM);
+ geneve->mdev = mdev;
+ mutex_init(&geneve->sync_lock);
+
+ return geneve;
+}
+
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve)
+{
+ if (IS_ERR_OR_NULL(geneve))
+ return;
+
+ /* Lockless since we are unloading */
+ if (geneve->refcount)
+ mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+ kfree(geneve);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h
new file mode 100644
index 000000000..adee0cbba
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_GENEVE_H__
+#define __MLX5_GENEVE_H__
+
+#include <net/geneve.h>
+#include <linux/mlx5/driver.h>
+
+struct mlx5_geneve;
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev);
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve);
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt);
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline struct mlx5_geneve
+*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; }
+static inline void
+mlx5_geneve_destroy(struct mlx5_geneve *geneve) {}
+static inline int
+mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; }
+static inline void
+mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_GENEVE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
new file mode 100644
index 000000000..96ffc0a0e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
+{
+ unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size);
+
+ ida_init(&dev->roce.reserved_gids.ida);
+ dev->roce.reserved_gids.start = tblsz;
+ dev->roce.reserved_gids.count = 0;
+}
+
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
+{
+ WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida));
+ dev->roce.reserved_gids.start = 0;
+ dev->roce.reserved_gids.count = 0;
+ ida_destroy(&dev->roce.reserved_gids.ida);
+}
+
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+ if (dev->roce.reserved_gids.start < count) {
+ mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
+ count);
+ return -ENOMEM;
+ }
+ if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) {
+ mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count);
+ return -ENOMEM;
+ }
+
+ dev->roce.reserved_gids.start -= count;
+ dev->roce.reserved_gids.count += count;
+ mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n",
+ dev->roce.reserved_gids.count,
+ dev->roce.reserved_gids.start);
+ return 0;
+}
+
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+ WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
+ count, dev->roce.reserved_gids.count);
+
+ dev->roce.reserved_gids.start += count;
+ dev->roce.reserved_gids.count -= count;
+ mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n",
+ dev->roce.reserved_gids.count,
+ dev->roce.reserved_gids.start);
+}
+
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
+{
+ int end = dev->roce.reserved_gids.start +
+ dev->roce.reserved_gids.count - 1;
+ int index = 0;
+
+ index = ida_alloc_range(&dev->roce.reserved_gids.ida,
+ dev->roce.reserved_gids.start, end,
+ GFP_KERNEL);
+ if (index < 0)
+ return index;
+
+ mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index);
+ *gid_index = index;
+ return 0;
+}
+
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
+{
+ mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
+ ida_free(&dev->roce.reserved_gids.ida, gid_index);
+}
+
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
+{
+ return dev->roce.reserved_gids.count;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
+
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+ u8 roce_version, u8 roce_l3_type, const u8 *gid,
+ const u8 *mac, bool vlan, u16 vlan_id, u8 port_num)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {};
+ void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+ char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+ source_l3_address);
+ void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+ source_mac_47_32);
+ int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address);
+
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EINVAL;
+
+ if (gid) {
+ if (vlan) {
+ MLX5_SET_RA(in_addr, vlan_valid, 1);
+ MLX5_SET_RA(in_addr, vlan_id, vlan_id);
+ }
+
+ ether_addr_copy(addr_mac, mac);
+ memcpy(addr_l3_addr, gid, gidsz);
+ }
+ MLX5_SET_RA(in_addr, roce_version, roce_version);
+ MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
+
+ if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0)
+ MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num);
+
+ MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+ return mlx5_cmd_exec_in(dev, set_roce_address, in);
+}
+EXPORT_SYMBOL(mlx5_core_roce_gid_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
new file mode 100644
index 000000000..583dc7e2a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+
+static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset, bool read)
+{
+ int rc = -EOPNOTSUPP;
+ int bytes_returned;
+ int block_id;
+
+ if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len != HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
+
+ rc = read ?
+ hyperv_read_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id,
+ &bytes_returned) :
+ hyperv_write_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id);
+
+ /* Make sure len bytes were read successfully */
+ if (read && !rc && len != bytes_returned)
+ rc = -EIO;
+
+ if (rc) {
+ mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
+ read ? "read" : "write", rc, len,
+ offset);
+ return rc;
+ }
+
+ return 0;
+}
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, true);
+}
+
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, false);
+}
+
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ return hyperv_reg_block_invalidate(dev->pdev, context,
+ block_invalidate);
+}
+
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
+{
+ hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
new file mode 100644
index 000000000..f9a45573f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_H__
+#define __LIB_HV_H__
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+#include <linux/hyperv.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
+#endif
+
+#endif /* __LIB_HV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
new file mode 100644
index 000000000..4047629a8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+#include "lib/hv_vhca.h"
+
+struct mlx5_hv_vhca {
+ struct mlx5_core_dev *dev;
+ struct workqueue_struct *work_queue;
+ struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX];
+ struct mutex agents_lock; /* Protect agents array */
+};
+
+struct mlx5_hv_vhca_work {
+ struct work_struct invalidate_work;
+ struct mlx5_hv_vhca *hv_vhca;
+ u64 block_mask;
+};
+
+struct mlx5_hv_vhca_data_block {
+ u16 sequence;
+ u16 offset;
+ u8 reserved[4];
+ u64 data[15];
+};
+
+struct mlx5_hv_vhca_agent {
+ enum mlx5_hv_vhca_agent_type type;
+ struct mlx5_hv_vhca *hv_vhca;
+ void *priv;
+ u16 seq;
+ void (*control)(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_control_block *block);
+ void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
+ u64 block_mask);
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_hv_vhca *hv_vhca = NULL;
+
+ hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
+ if (!hv_vhca)
+ return ERR_PTR(-ENOMEM);
+
+ hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
+ if (!hv_vhca->work_queue) {
+ kfree(hv_vhca);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ hv_vhca->dev = dev;
+ mutex_init(&hv_vhca->agents_lock);
+
+ return hv_vhca;
+}
+
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return;
+
+ destroy_workqueue(hv_vhca->work_queue);
+ kfree(hv_vhca);
+}
+
+static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
+{
+ struct mlx5_hv_vhca_work *hwork;
+ struct mlx5_hv_vhca *hv_vhca;
+ int i;
+
+ hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
+ hv_vhca = hwork->hv_vhca;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (!agent || !agent->invalidate)
+ continue;
+
+ if (!(BIT(agent->type) & hwork->block_mask))
+ continue;
+
+ agent->invalidate(agent, hwork->block_mask);
+ }
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ kfree(hwork);
+}
+
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
+{
+ struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
+ struct mlx5_hv_vhca_work *work;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
+ work->hv_vhca = hv_vhca;
+ work->block_mask = block_mask;
+
+ queue_work(hv_vhca->work_queue, &work->invalidate_work);
+}
+
+#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
+
+static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
+ struct mlx5_hv_vhca_control_block *block)
+{
+ int i;
+
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (!agent || !agent->control)
+ continue;
+
+ if (!(AGENT_MASK(agent->type) & block->control))
+ continue;
+
+ agent->control(agent, block);
+ }
+}
+
+static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
+ u32 *capabilities)
+{
+ int i;
+
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (agent)
+ *capabilities |= AGENT_MASK(agent->type);
+ }
+}
+
+static void
+mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
+ u64 block_mask)
+{
+ struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+ struct mlx5_core_dev *dev = hv_vhca->dev;
+ struct mlx5_hv_vhca_control_block *block;
+ u32 capabilities = 0;
+ int err;
+
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block)
+ return;
+
+ err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
+ if (err)
+ goto free_block;
+
+ mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
+
+ /* In case no capabilities, send empty block in return */
+ if (!capabilities) {
+ memset(block, 0, sizeof(*block));
+ goto write;
+ }
+
+ if (block->capabilities != capabilities)
+ block->capabilities = capabilities;
+
+ if (block->control & ~capabilities)
+ goto free_block;
+
+ mlx5_hv_vhca_agents_control(hv_vhca, block);
+ block->command_ack = block->command;
+
+write:
+ mlx5_hv_write_config(dev, block, sizeof(*block), 0);
+
+free_block:
+ kfree(block);
+}
+
+static struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
+{
+ return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
+ NULL,
+ mlx5_hv_vhca_control_agent_invalidate,
+ NULL, NULL);
+}
+
+static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+ mlx5_hv_vhca_agent_destroy(agent);
+}
+
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+ struct mlx5_hv_vhca_agent *agent;
+ int err;
+
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return IS_ERR_OR_NULL(hv_vhca);
+
+ err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+ mlx5_hv_vhca_invalidate);
+ if (err)
+ return err;
+
+ agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
+ if (IS_ERR_OR_NULL(agent)) {
+ mlx5_hv_unregister_invalidate(hv_vhca->dev);
+ return IS_ERR_OR_NULL(agent);
+ }
+
+ hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
+
+ return 0;
+}
+
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+ struct mlx5_hv_vhca_agent *agent;
+ int i;
+
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return;
+
+ agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
+ if (agent)
+ mlx5_hv_vhca_control_agent_destroy(agent);
+
+ mutex_lock(&hv_vhca->agents_lock);
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
+ WARN_ON(hv_vhca->agents[i]);
+
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ mlx5_hv_unregister_invalidate(hv_vhca->dev);
+}
+
+static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
+{
+ mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
+}
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
+ void *priv)
+{
+ struct mlx5_hv_vhca_agent *agent;
+
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return ERR_PTR(-ENOMEM);
+
+ if (type >= MLX5_HV_VHCA_AGENT_MAX)
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&hv_vhca->agents_lock);
+ if (hv_vhca->agents[type]) {
+ mutex_unlock(&hv_vhca->agents_lock);
+ return ERR_PTR(-EINVAL);
+ }
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ agent = kzalloc(sizeof(*agent), GFP_KERNEL);
+ if (!agent)
+ return ERR_PTR(-ENOMEM);
+
+ agent->type = type;
+ agent->hv_vhca = hv_vhca;
+ agent->priv = priv;
+ agent->control = control;
+ agent->invalidate = invalidate;
+ agent->cleanup = cleaup;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ hv_vhca->agents[type] = agent;
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ mlx5_hv_vhca_agents_update(hv_vhca);
+
+ return agent;
+}
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+ struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+
+ mutex_lock(&hv_vhca->agents_lock);
+
+ if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
+ mutex_unlock(&hv_vhca->agents_lock);
+ return;
+ }
+
+ hv_vhca->agents[agent->type] = NULL;
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ if (agent->cleanup)
+ agent->cleanup(agent);
+
+ kfree(agent);
+
+ mlx5_hv_vhca_agents_update(hv_vhca);
+}
+
+static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_data_block *data_block,
+ void *src, int len, int *offset)
+{
+ int bytes = min_t(int, (int)sizeof(data_block->data), len);
+
+ data_block->sequence = agent->seq;
+ data_block->offset = (*offset)++;
+ memcpy(data_block->data, src, bytes);
+
+ return bytes;
+}
+
+static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
+{
+ agent->seq++;
+}
+
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len)
+{
+ int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
+ int block_offset = 0;
+ int total = 0;
+ int err;
+
+ while (len) {
+ struct mlx5_hv_vhca_data_block data_block = {0};
+ int bytes;
+
+ bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
+ buf + total,
+ len, &block_offset);
+ if (!bytes)
+ return -ENOMEM;
+
+ err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
+ sizeof(data_block), offset);
+ if (err)
+ return err;
+
+ total += bytes;
+ len -= bytes;
+ }
+
+ mlx5_hv_vhca_agent_seq_update(agent);
+
+ return 0;
+}
+
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
+{
+ return agent->priv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
new file mode 100644
index 000000000..f240ffe51
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_VHCA_H__
+#define __LIB_HV_VHCA_H__
+
+#include "en.h"
+#include "lib/hv.h"
+
+struct mlx5_hv_vhca_agent;
+struct mlx5_hv_vhca;
+struct mlx5_hv_vhca_control_block;
+
+enum mlx5_hv_vhca_agent_type {
+ MLX5_HV_VHCA_AGENT_CONTROL = 0,
+ MLX5_HV_VHCA_AGENT_STATS = 1,
+ MLX5_HV_VHCA_AGENT_MAX = 32,
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+struct mlx5_hv_vhca_control_block {
+ u32 capabilities;
+ u32 control;
+ u16 command;
+ u16 command_ack;
+ u16 version;
+ u16 rings;
+ u32 reserved1[28];
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+ void *context);
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len);
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
+
+#else
+
+static inline struct mlx5_hv_vhca *
+mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+ return NULL;
+}
+
+static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+ return 0;
+}
+
+static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline void mlx5_hv_vhca_invalidate(void *context,
+ u64 block_mask)
+{
+}
+
+static inline struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+ void *context)
+{
+ return NULL;
+}
+
+static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+}
+#endif
+
+#endif /* __LIB_HV_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
new file mode 100644
index 000000000..032adb21a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_MLX5_H__
+#define __LIB_MLX5_H__
+
+#include "mlx5_core.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+int mlx5_crdump_enable(struct mlx5_core_dev *dev);
+void mlx5_crdump_disable(struct mlx5_core_dev *dev);
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data);
+
+/* TODO move to lib/events.h */
+
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF
+
+enum port_module_event_status_type {
+ MLX5_MODULE_STATUS_PLUGGED = 0x1,
+ MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+ MLX5_MODULE_STATUS_ERROR = 0x3,
+ MLX5_MODULE_STATUS_DISABLED = 0x4,
+ MLX5_MODULE_STATUS_NUM,
+};
+
+enum port_module_event_error_type {
+ MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0,
+ MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1,
+ MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2,
+ MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3,
+ MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4,
+ MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5,
+ MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6,
+ MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7,
+ MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc,
+ MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_pme_stats {
+ u64 status_counters[MLX5_MODULE_STATUS_NUM];
+ u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+
+/* Crypto */
+enum {
+ MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS,
+ MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC,
+ MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_MACSEC,
+};
+
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+ void *key, u32 sz_bytes,
+ u32 key_type, u32 *p_key_id);
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id);
+
+static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev)
+{
+ return devlink_net(priv_to_devlink(dev));
+}
+
+static inline void mlx5_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev)
+{
+ mdev->mlx5e_res.uplink_netdev = netdev;
+}
+
+static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev)
+{
+ return mdev->mlx5e_res.uplink_netdev;
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
new file mode 100644
index 000000000..8ff16318e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/mpfs.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_core.h"
+#include "lib/mpfs.h"
+
+/* HW L2 Table (MPFS) management */
+static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac)
+{
+ u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {};
+ u8 *in_mac_addr;
+
+ MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
+ MLX5_SET(set_l2_table_entry_in, in, table_index, index);
+
+ in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
+ ether_addr_copy(&in_mac_addr[2], mac);
+
+ return mlx5_cmd_exec_in(dev, set_l2_table_entry, in);
+}
+
+static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
+{
+ u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {};
+
+ MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+ MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
+ return mlx5_cmd_exec_in(dev, delete_l2_table_entry, in);
+}
+
+/* UC L2 table hash node */
+struct l2table_node {
+ struct l2addr_node node;
+ u32 index; /* index in HW l2 table */
+ int ref_count;
+};
+
+struct mlx5_mpfs {
+ struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE];
+ struct mutex lock; /* Synchronize l2 table access */
+ u32 size;
+ unsigned long *bitmap;
+};
+
+static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix)
+{
+ int err = 0;
+
+ *ix = find_first_zero_bit(l2table->bitmap, l2table->size);
+ if (*ix >= l2table->size)
+ err = -ENOSPC;
+ else
+ __set_bit(*ix, l2table->bitmap);
+
+ return err;
+}
+
+static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix)
+{
+ __clear_bit(ix, l2table->bitmap);
+}
+
+int mlx5_mpfs_init(struct mlx5_core_dev *dev)
+{
+ int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
+ struct mlx5_mpfs *mpfs;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return 0;
+
+ mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL);
+ if (!mpfs)
+ return -ENOMEM;
+
+ mutex_init(&mpfs->lock);
+ mpfs->size = l2table_size;
+ mpfs->bitmap = bitmap_zalloc(l2table_size, GFP_KERNEL);
+ if (!mpfs->bitmap) {
+ kfree(mpfs);
+ return -ENOMEM;
+ }
+
+ dev->priv.mpfs = mpfs;
+ return 0;
+}
+
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+
+ if (!mpfs)
+ return;
+
+ WARN_ON(!hlist_empty(mpfs->hash));
+ bitmap_free(mpfs->bitmap);
+ kfree(mpfs);
+}
+
+int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+ struct l2table_node *l2addr;
+ int err = 0;
+ u32 index;
+
+ if (!mpfs)
+ return 0;
+
+ mutex_lock(&mpfs->lock);
+
+ l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+ if (l2addr) {
+ l2addr->ref_count++;
+ goto out;
+ }
+
+ err = alloc_l2table_index(mpfs, &index);
+ if (err)
+ goto out;
+
+ l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
+ if (!l2addr) {
+ err = -ENOMEM;
+ goto hash_add_err;
+ }
+
+ err = set_l2table_entry_cmd(dev, index, mac);
+ if (err)
+ goto set_table_entry_err;
+
+ l2addr->index = index;
+ l2addr->ref_count = 1;
+
+ mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
+ goto out;
+
+set_table_entry_err:
+ l2addr_hash_del(l2addr);
+hash_add_err:
+ free_l2table_index(mpfs, index);
+out:
+ mutex_unlock(&mpfs->lock);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_mpfs_add_mac);
+
+int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+ struct l2table_node *l2addr;
+ int err = 0;
+ u32 index;
+
+ if (!mpfs)
+ return 0;
+
+ mutex_lock(&mpfs->lock);
+
+ l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+ if (!l2addr) {
+ err = -ENOENT;
+ goto unlock;
+ }
+
+ if (--l2addr->ref_count > 0)
+ goto unlock;
+
+ index = l2addr->index;
+ del_l2table_entry_cmd(dev, index);
+ l2addr_hash_del(l2addr);
+ free_l2table_index(mpfs, index);
+ mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index);
+unlock:
+ mutex_unlock(&mpfs->lock);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_mpfs_del_mac);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
new file mode 100644
index 000000000..4a293542a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_MPFS_H__
+#define __MLX5_MPFS_H__
+
+#include <linux/if_ether.h>
+#include <linux/mlx5/device.h>
+
+/* L2 -mac address based- hash helpers */
+#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
+#define MLX5_L2_ADDR_HASH(addr) (addr[5])
+
+struct l2addr_node {
+ struct hlist_node hlist;
+ u8 addr[ETH_ALEN];
+};
+
+#define for_each_l2hash_node(hn, tmp, hash, i) \
+ for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
+ hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist)
+
+#define l2addr_hash_find(hash, mac, type) ({ \
+ int ix = MLX5_L2_ADDR_HASH(mac); \
+ bool found = false; \
+ type *ptr = NULL; \
+ \
+ hlist_for_each_entry(ptr, &(hash)[ix], node.hlist) \
+ if (ether_addr_equal(ptr->node.addr, mac)) {\
+ found = true; \
+ break; \
+ } \
+ if (!found) \
+ ptr = NULL; \
+ ptr; \
+})
+
+#define l2addr_hash_add(hash, mac, type, gfp) ({ \
+ int ix = MLX5_L2_ADDR_HASH(mac); \
+ type *ptr = NULL; \
+ \
+ ptr = kzalloc(sizeof(type), gfp); \
+ if (ptr) { \
+ ether_addr_copy(ptr->node.addr, mac); \
+ hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\
+ } \
+ ptr; \
+})
+
+#define l2addr_hash_del(ptr) ({ \
+ hlist_del(&(ptr)->node.hlist); \
+ kfree(ptr); \
+})
+
+#ifdef CONFIG_MLX5_MPFS
+int mlx5_mpfs_init(struct mlx5_core_dev *dev);
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev);
+#else /* #ifndef CONFIG_MLX5_MPFS */
+static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {}
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
new file mode 100644
index 000000000..6b774e0c2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/pci.h>
+#include "mlx5_core.h"
+#include "pci_vsc.h"
+
+#define MLX5_EXTRACT_C(source, offset, size) \
+ ((((u32)(source)) >> (offset)) & MLX5_ONES32(size))
+#define MLX5_EXTRACT(src, start, len) \
+ (((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len))
+#define MLX5_ONES32(size) \
+ ((size) ? (0xffffffff >> (32 - (size))) : 0)
+#define MLX5_MASK32(offset, size) \
+ (MLX5_ONES32(size) << (offset))
+#define MLX5_MERGE_C(rsrc1, rsrc2, start, len) \
+ ((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \
+ ((rsrc1) & (~MLX5_MASK32((start), (len)))))
+#define MLX5_MERGE(rsrc1, rsrc2, start, len) \
+ (((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len))
+#define vsc_read(dev, offset, val) \
+ pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define vsc_write(dev, offset, val) \
+ pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define VSC_MAX_RETRIES 2048
+
+enum {
+ VSC_CTRL_OFFSET = 0x4,
+ VSC_COUNTER_OFFSET = 0x8,
+ VSC_SEMAPHORE_OFFSET = 0xc,
+ VSC_ADDR_OFFSET = 0x10,
+ VSC_DATA_OFFSET = 0x14,
+
+ VSC_FLAG_BIT_OFFS = 31,
+ VSC_FLAG_BIT_LEN = 1,
+
+ VSC_SYND_BIT_OFFS = 30,
+ VSC_SYND_BIT_LEN = 1,
+
+ VSC_ADDR_BIT_OFFS = 0,
+ VSC_ADDR_BIT_LEN = 30,
+
+ VSC_SPACE_BIT_OFFS = 0,
+ VSC_SPACE_BIT_LEN = 16,
+
+ VSC_SIZE_VLD_BIT_OFFS = 28,
+ VSC_SIZE_VLD_BIT_LEN = 1,
+
+ VSC_STATUS_BIT_OFFS = 29,
+ VSC_STATUS_BIT_LEN = 3,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_pf(dev))
+ return;
+
+ dev->vsc_addr = pci_find_capability(dev->pdev,
+ PCI_CAP_ID_VNDR);
+ if (!dev->vsc_addr)
+ mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n");
+}
+
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev)
+{
+ u32 counter = 0;
+ int retries = 0;
+ u32 lock_val;
+ int ret;
+
+ pci_cfg_access_lock(dev->pdev);
+ do {
+ if (retries > VSC_MAX_RETRIES) {
+ ret = -EBUSY;
+ goto pci_unlock;
+ }
+
+ /* Check if semaphore is already locked */
+ ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+ if (ret)
+ goto pci_unlock;
+
+ if (lock_val) {
+ retries++;
+ usleep_range(1000, 2000);
+ continue;
+ }
+
+ /* Read and write counter value, if written value is
+ * the same, semaphore was acquired successfully.
+ */
+ ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter);
+ if (ret)
+ goto pci_unlock;
+
+ ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter);
+ if (ret)
+ goto pci_unlock;
+
+ ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+ if (ret)
+ goto pci_unlock;
+
+ retries++;
+ } while (counter != lock_val);
+
+ return 0;
+
+pci_unlock:
+ pci_cfg_access_unlock(dev->pdev);
+ return ret;
+}
+
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev)
+{
+ int ret;
+
+ ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK);
+ pci_cfg_access_unlock(dev->pdev);
+ return ret;
+}
+
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+ u32 *ret_space_size)
+{
+ int ret;
+ u32 val = 0;
+
+ if (!mlx5_vsc_accessible(dev))
+ return -EINVAL;
+
+ if (ret_space_size)
+ *ret_space_size = 0;
+
+ /* Get a unique val */
+ ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+ if (ret)
+ goto out;
+
+ /* Try to modify the lock */
+ val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN);
+ ret = vsc_write(dev, VSC_CTRL_OFFSET, val);
+ if (ret)
+ goto out;
+
+ /* Verify lock was modified */
+ ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+ if (ret)
+ goto out;
+
+ if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0)
+ return -EINVAL;
+
+ /* Get space max address if indicated by size valid bit */
+ if (ret_space_size &&
+ MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) {
+ ret = vsc_read(dev, VSC_ADDR_OFFSET, &val);
+ if (ret) {
+ mlx5_core_warn(dev, "Failed to get max space size\n");
+ goto out;
+ }
+ *ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS,
+ VSC_ADDR_BIT_LEN);
+ }
+ return 0;
+
+out:
+ return ret;
+}
+
+static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val)
+{
+ int retries = 0;
+ u32 flag;
+ int ret;
+
+ do {
+ if (retries > VSC_MAX_RETRIES)
+ return -EBUSY;
+
+ ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag);
+ if (ret)
+ return ret;
+ flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN);
+ retries++;
+
+ if ((retries & 0xf) == 0)
+ usleep_range(1000, 2000);
+
+ } while (flag != expected_val);
+
+ return 0;
+}
+
+static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address,
+ u32 data)
+{
+ int ret;
+
+ if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+ VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+ return -EINVAL;
+
+ /* Set flag to 0x1 */
+ address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1);
+ ret = vsc_write(dev, VSC_DATA_OFFSET, data);
+ if (ret)
+ goto out;
+
+ ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+ if (ret)
+ goto out;
+
+ /* Wait for the flag to be cleared */
+ ret = mlx5_vsc_wait_on_flag(dev, 0);
+
+out:
+ return ret;
+}
+
+static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address,
+ u32 *data)
+{
+ int ret;
+
+ if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+ VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+ return -EINVAL;
+
+ ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+ if (ret)
+ goto out;
+
+ ret = mlx5_vsc_wait_on_flag(dev, 1);
+ if (ret)
+ goto out;
+
+ ret = vsc_read(dev, VSC_DATA_OFFSET, data);
+out:
+ return ret;
+}
+
+static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev,
+ unsigned int read_addr,
+ unsigned int *next_read_addr,
+ u32 *data)
+{
+ int ret;
+
+ ret = mlx5_vsc_gw_read(dev, read_addr, data);
+ if (ret)
+ goto out;
+
+ ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr);
+ if (ret)
+ goto out;
+
+ *next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS,
+ VSC_ADDR_BIT_LEN);
+
+ if (*next_read_addr <= read_addr)
+ ret = -EINVAL;
+out:
+ return ret;
+}
+
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+ int length)
+{
+ unsigned int next_read_addr = 0;
+ unsigned int read_addr = 0;
+
+ while (read_addr < length) {
+ if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr,
+ &data[(read_addr >> 2)]))
+ return read_addr;
+
+ read_addr = next_read_addr;
+ }
+ return length;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+ enum mlx5_vsc_state state)
+{
+ u32 data, id = 0;
+ int ret;
+
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL);
+ if (ret) {
+ mlx5_core_warn(dev, "Failed to set gw space %d\n", ret);
+ return ret;
+ }
+
+ if (state == MLX5_VSC_LOCK) {
+ /* Get a unique ID based on the counter */
+ ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id);
+ if (ret)
+ return ret;
+ }
+
+ /* Try to modify lock */
+ ret = mlx5_vsc_gw_write(dev, space, id);
+ if (ret)
+ return ret;
+
+ /* Verify lock was modified */
+ ret = mlx5_vsc_gw_read(dev, space, &data);
+ if (ret)
+ return -EINVAL;
+
+ if (data != id)
+ return -EBUSY;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
new file mode 100644
index 000000000..64272a6d7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#ifndef __MLX5_PCI_VSC_H__
+#define __MLX5_PCI_VSC_H__
+
+enum mlx5_vsc_state {
+ MLX5_VSC_UNLOCK,
+ MLX5_VSC_LOCK,
+};
+
+enum {
+ MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+ u32 *ret_space_size);
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+ int length);
+
+static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev)
+{
+ return !!dev->vsc_addr;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+ enum mlx5_vsc_state state);
+
+#endif /* __MLX5_PCI_VSC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
new file mode 100644
index 000000000..4571c56ec
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/port.h>
+#include "mlx5_core.h"
+#include "lib/port_tun.h"
+
+struct mlx5_port_tun_entropy_flags {
+ bool force_supported, force_enabled;
+ bool calc_supported, calc_enabled;
+ bool gre_calc_supported, gre_calc_enabled;
+};
+
+static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev,
+ struct mlx5_port_tun_entropy_flags *entropy_flags)
+{
+ u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+ /* Default values for FW which do not support MLX5_REG_PCMR */
+ entropy_flags->force_supported = false;
+ entropy_flags->calc_supported = false;
+ entropy_flags->gre_calc_supported = false;
+ entropy_flags->force_enabled = false;
+ entropy_flags->calc_enabled = true;
+ entropy_flags->gre_calc_enabled = true;
+
+ if (!MLX5_CAP_GEN(mdev, ports_check))
+ return;
+
+ if (mlx5_query_ports_check(mdev, out, sizeof(out)))
+ return;
+
+ entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap));
+ entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap));
+ entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap));
+ entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force));
+ entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc));
+ entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc));
+}
+
+static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable,
+ u8 force)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+ int err;
+
+ err = mlx5_query_ports_check(mdev, in, sizeof(in));
+ if (err)
+ return err;
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ MLX5_SET(pcmr_reg, in, entropy_force, force);
+ MLX5_SET(pcmr_reg, in, entropy_calc, enable);
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev,
+ u8 enable, u8 force)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+ int err;
+
+ err = mlx5_query_ports_check(mdev, in, sizeof(in));
+ if (err)
+ return err;
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ MLX5_SET(pcmr_reg, in, entropy_force, force);
+ MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable);
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+ struct mlx5_core_dev *mdev)
+{
+ struct mlx5_port_tun_entropy_flags entropy_flags;
+
+ tun_entropy->mdev = mdev;
+ mutex_init(&tun_entropy->lock);
+ mlx5_query_port_tun_entropy(mdev, &entropy_flags);
+ tun_entropy->num_enabling_entries = 0;
+ tun_entropy->num_disabling_entries = 0;
+ tun_entropy->enabled = entropy_flags.calc_supported ?
+ entropy_flags.calc_enabled : true;
+}
+
+static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type, bool enable)
+{
+ struct mlx5_port_tun_entropy_flags entropy_flags;
+ int err;
+
+ mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags);
+ /* Tunnel entropy calculation may be controlled either on port basis
+ * for all tunneling protocols or specifically for GRE protocol.
+ * Prioritize GRE protocol control (if capable) over global port
+ * configuration.
+ */
+ if (entropy_flags.gre_calc_supported &&
+ reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+ if (!entropy_flags.force_supported)
+ return 0;
+ err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev,
+ enable, !enable);
+ if (err)
+ return err;
+ } else if (entropy_flags.calc_supported) {
+ /* Other applications may change the global FW entropy
+ * calculations settings. Check that the current entropy value
+ * is the negative of the updated value.
+ */
+ if (entropy_flags.force_enabled &&
+ enable == entropy_flags.calc_enabled) {
+ mlx5_core_warn(tun_entropy->mdev,
+ "Unexpected entropy calc setting - expected %d",
+ !entropy_flags.calc_enabled);
+ return -EOPNOTSUPP;
+ }
+ /* GRE requires disabling entropy calculation. if there are
+ * enabling entries (i.e VXLAN) we cannot turn it off for them,
+ * thus fail.
+ */
+ if (tun_entropy->num_enabling_entries)
+ return -EOPNOTSUPP;
+ err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable,
+ entropy_flags.force_supported);
+ if (err)
+ return err;
+ tun_entropy->enabled = enable;
+ /* if we turn on the entropy we don't need to force it anymore */
+ if (entropy_flags.force_supported && enable) {
+ err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+/* the function manages the refcount for enabling/disabling tunnel types.
+ * the return value indicates if the inc is successful or not, depending on
+ * entropy capabilities and configuration.
+ */
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type)
+{
+ int err = -EOPNOTSUPP;
+
+ mutex_lock(&tun_entropy->lock);
+ if ((reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN ||
+ reformat_type == MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL) &&
+ tun_entropy->enabled) {
+ /* in case entropy calculation is enabled for all tunneling
+ * types, it is ok for VXLAN, so approve.
+ * otherwise keep the error default.
+ */
+ tun_entropy->num_enabling_entries++;
+ err = 0;
+ } else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+ /* turn off the entropy only for the first GRE rule.
+ * for the next rules the entropy was already disabled
+ * successfully.
+ */
+ if (tun_entropy->num_disabling_entries == 0)
+ err = mlx5_set_entropy(tun_entropy, reformat_type, 0);
+ else
+ err = 0;
+ if (!err)
+ tun_entropy->num_disabling_entries++;
+ }
+ mutex_unlock(&tun_entropy->lock);
+
+ return err;
+}
+
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type)
+{
+ mutex_lock(&tun_entropy->lock);
+ if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN)
+ tun_entropy->num_enabling_entries--;
+ else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE &&
+ --tun_entropy->num_disabling_entries == 0)
+ mlx5_set_entropy(tun_entropy, reformat_type, 1);
+ mutex_unlock(&tun_entropy->lock);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
new file mode 100644
index 000000000..54c42a887
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_PORT_TUN_H__
+#define __MLX5_PORT_TUN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_tun_entropy {
+ struct mlx5_core_dev *mdev;
+ u32 num_enabling_entries;
+ u32 num_disabling_entries;
+ u8 enabled;
+ struct mutex lock; /* lock the entropy fields */
+};
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+ struct mlx5_core_dev *mdev);
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type);
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type);
+
+#endif /* __MLX5_PORT_TUN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
new file mode 100644
index 000000000..84e568386
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#ifndef __LIB_MLX5_SF_H__
+#define __LIB_MLX5_SF_H__
+
+#include <linux/mlx5/driver.h>
+
+static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN(dev, sf_base_id);
+}
+
+#ifdef CONFIG_MLX5_SF
+
+static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN(dev, sf);
+}
+
+static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
+{
+ if (!mlx5_sf_supported(dev))
+ return 0;
+ if (MLX5_CAP_GEN(dev, max_num_sf))
+ return MLX5_CAP_GEN(dev, max_num_sf);
+ else
+ return 1 << MLX5_CAP_GEN(dev, log_max_sf);
+}
+
+#else
+
+static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
+{
+ return false;
+}
+
+static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c
new file mode 100644
index 000000000..9b8c051cc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+#include "smfs.h"
+
+struct mlx5dr_matcher *
+mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec)
+{
+ struct mlx5dr_match_parameters matcher_mask = {};
+
+ matcher_mask.match_buf = (u64 *)&spec->match_criteria;
+ matcher_mask.match_sz = DR_SZ_MATCH_PARAM;
+
+ return mlx5dr_matcher_create(table, priority, spec->match_criteria_enable, &matcher_mask);
+}
+
+void
+mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher)
+{
+ mlx5dr_matcher_destroy(matcher);
+}
+
+struct mlx5dr_table *
+mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft)
+{
+ return mlx5dr_table_get_from_fs_ft(ft);
+}
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table)
+{
+ return mlx5dr_action_create_dest_table(table);
+}
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_flow_counter(u32 counter_id)
+{
+ return mlx5dr_action_create_flow_counter(counter_id);
+}
+
+void
+mlx5_smfs_action_destroy(struct mlx5dr_action *action)
+{
+ mlx5dr_action_destroy(action);
+}
+
+struct mlx5dr_rule *
+mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec,
+ size_t num_actions, struct mlx5dr_action *actions[],
+ u32 flow_source)
+{
+ struct mlx5dr_match_parameters value = {};
+
+ value.match_buf = (u64 *)spec->match_value;
+ value.match_sz = DR_SZ_MATCH_PARAM;
+
+ return mlx5dr_rule_create(matcher, &value, num_actions, actions, flow_source);
+}
+
+void
+mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule)
+{
+ mlx5dr_rule_destroy(rule);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h
new file mode 100644
index 000000000..452d0df33
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_LIB_SMFS_H__
+#define __MLX5_LIB_SMFS_H__
+
+#include "steering/mlx5dr.h"
+#include "steering/dr_types.h"
+
+struct mlx5dr_matcher *
+mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec);
+
+void
+mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher);
+
+struct mlx5dr_table *
+mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft);
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table);
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_flow_counter(u32 counter_id);
+
+void
+mlx5_smfs_action_destroy(struct mlx5dr_action *action);
+
+struct mlx5dr_rule *
+mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec,
+ size_t num_actions, struct mlx5dr_action *actions[],
+ u32 flow_source);
+
+void
+mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule);
+
+#endif /* __MLX5_LIB_SMFS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
new file mode 100644
index 000000000..696e45e2b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/driver.h>
+#include "lib/tout.h"
+
+struct mlx5_timeouts {
+ u64 to[MAX_TIMEOUT_TYPES];
+};
+
+static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = {
+ [MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000,
+ [MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000,
+ [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000,
+ [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2,
+ [MLX5_TO_FW_INIT_MS] = 2000,
+ [MLX5_TO_CMD_MS] = 60000,
+ [MLX5_TO_PCI_TOGGLE_MS] = 2000,
+ [MLX5_TO_HEALTH_POLL_INTERVAL_MS] = 2000,
+ [MLX5_TO_FULL_CRDUMP_MS] = 60000,
+ [MLX5_TO_FW_RESET_MS] = 60000,
+ [MLX5_TO_FLUSH_ON_ERROR_MS] = 2000,
+ [MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000,
+ [MLX5_TO_TEARDOWN_MS] = 3000,
+ [MLX5_TO_FSM_REACTIVATE_MS] = 5000,
+ [MLX5_TO_RECLAIM_PAGES_MS] = 5000,
+ [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000
+};
+
+static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type)
+{
+ dev->timeouts->to[type] = val;
+}
+
+int mlx5_tout_init(struct mlx5_core_dev *dev)
+{
+ int i;
+
+ dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL);
+ if (!dev->timeouts)
+ return -ENOMEM;
+
+ for (i = 0; i < MAX_TIMEOUT_TYPES; i++)
+ tout_set(dev, tout_def_sw_val[i], i);
+
+ return 0;
+}
+
+void mlx5_tout_cleanup(struct mlx5_core_dev *dev)
+{
+ kfree(dev->timeouts);
+}
+
+/* Time register consists of two fields to_multiplier(time out multiplier)
+ * and to_value(time out value). to_value is the quantity of the time units and
+ * to_multiplier is the type and should be one off these four values.
+ * 0x0: millisecond
+ * 0x1: seconds
+ * 0x2: minutes
+ * 0x3: hours
+ * this function converts the time stored in the two register fields into
+ * millisecond.
+ */
+static u64 tout_convert_reg_field_to_ms(u32 to_mul, u32 to_val)
+{
+ u64 msec = to_val;
+
+ to_mul &= 0x3;
+ /* convert hours/minutes/seconds to miliseconds */
+ if (to_mul)
+ msec *= 1000 * int_pow(60, to_mul - 1);
+
+ return msec;
+}
+
+static u64 tout_convert_iseg_to_ms(u32 iseg_to)
+{
+ return tout_convert_reg_field_to_ms(iseg_to >> 29, iseg_to & 0xfffff);
+}
+
+static bool tout_is_supported(struct mlx5_core_dev *dev)
+{
+ return !!ioread32be(&dev->iseg->cmd_q_init_to);
+}
+
+void mlx5_tout_query_iseg(struct mlx5_core_dev *dev)
+{
+ u32 to;
+
+ if (!tout_is_supported(dev))
+ return;
+
+ to = ioread32be(&dev->iseg->cmd_q_init_to);
+ tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_FW_INIT_MS);
+
+ to = ioread32be(&dev->iseg->cmd_exec_to);
+ tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_CMD_MS);
+}
+
+u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type)
+{
+ return dev->timeouts->to[type];
+}
+
+#define MLX5_TIMEOUT_QUERY(fld, reg_out) \
+ ({ \
+ struct mlx5_ifc_default_timeout_bits *time_field; \
+ u32 to_multi, to_value; \
+ u64 to_val_ms; \
+ \
+ time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \
+ to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \
+ to_value = MLX5_GET(default_timeout, time_field, to_value); \
+ to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \
+ to_val_ms; \
+ })
+
+#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \
+ ({ \
+ u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \
+ tout_set(dev, fw_to + (to_extra), to_type); \
+ fw_to; \
+ })
+
+static int tout_query_dtor(struct mlx5_core_dev *dev)
+{
+ u64 pcie_toggle_to_val, tear_down_to_val;
+ u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {};
+ int err;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0);
+ if (err)
+ return err;
+
+ pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0);
+ MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val);
+
+ tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0);
+ MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS,
+ tear_down_to_val);
+
+ MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0);
+ MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0);
+ MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0);
+ MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0);
+ MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0);
+ MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0);
+
+ return 0;
+}
+
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev)
+{
+ if (tout_is_supported(dev))
+ return tout_query_dtor(dev);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
new file mode 100644
index 000000000..bc9e9aeda
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef MLX5_TIMEOUTS_H
+#define MLX5_TIMEOUTS_H
+
+enum mlx5_timeouts_types {
+ /* pre init timeouts (not read from FW) */
+ MLX5_TO_FW_PRE_INIT_TIMEOUT_MS,
+ MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS,
+ MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS,
+ MLX5_TO_FW_PRE_INIT_WAIT_MS,
+
+ /* init segment timeouts */
+ MLX5_TO_FW_INIT_MS,
+ MLX5_TO_CMD_MS,
+
+ /* DTOR timeouts */
+ MLX5_TO_PCI_TOGGLE_MS,
+ MLX5_TO_HEALTH_POLL_INTERVAL_MS,
+ MLX5_TO_FULL_CRDUMP_MS,
+ MLX5_TO_FW_RESET_MS,
+ MLX5_TO_FLUSH_ON_ERROR_MS,
+ MLX5_TO_PCI_SYNC_UPDATE_MS,
+ MLX5_TO_TEARDOWN_MS,
+ MLX5_TO_FSM_REACTIVATE_MS,
+ MLX5_TO_RECLAIM_PAGES_MS,
+ MLX5_TO_RECLAIM_VFS_PAGES_MS,
+
+ MAX_TIMEOUT_TYPES
+};
+
+struct mlx5_core_dev;
+int mlx5_tout_init(struct mlx5_core_dev *dev);
+void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
+void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev);
+u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
+
+#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)
+
+# endif /* MLX5_TIMEOUTS_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
new file mode 100644
index 000000000..d55e15c1f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/refcount.h>
+#include <linux/mlx5/driver.h>
+#include <net/vxlan.h>
+#include "mlx5_core.h"
+#include "vxlan.h"
+
+struct mlx5_vxlan {
+ struct mlx5_core_dev *mdev;
+ /* max_num_ports is usually 4, 16 buckets is more than enough */
+ DECLARE_HASHTABLE(htable, 4);
+ struct mutex sync_lock; /* sync add/del port HW operations */
+};
+
+struct mlx5_vxlan_port {
+ struct hlist_node hlist;
+ u16 udp_port;
+};
+
+static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+ u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {};
+
+ MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
+ MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
+ MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+ return mlx5_cmd_exec_in(mdev, add_vxlan_udp_dport, in);
+}
+
+static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+ u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {};
+
+ MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
+ MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+ MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+ return mlx5_cmd_exec_in(mdev, delete_vxlan_udp_dport, in);
+}
+
+bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ bool found = false;
+
+ if (!mlx5_vxlan_allowed(vxlan))
+ return NULL;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(vxlan->htable, vxlanp, hlist, port)
+ if (vxlanp->udp_port == port) {
+ found = true;
+ break;
+ }
+ rcu_read_unlock();
+
+ return found;
+}
+
+static struct mlx5_vxlan_port *vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+
+ hash_for_each_possible(vxlan->htable, vxlanp, hlist, port)
+ if (vxlanp->udp_port == port)
+ return vxlanp;
+ return NULL;
+}
+
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ int ret;
+
+ vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
+ if (!vxlanp)
+ return -ENOMEM;
+ vxlanp->udp_port = port;
+
+ ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
+ if (ret) {
+ kfree(vxlanp);
+ return ret;
+ }
+
+ mutex_lock(&vxlan->sync_lock);
+ hash_add_rcu(vxlan->htable, &vxlanp->hlist, port);
+ mutex_unlock(&vxlan->sync_lock);
+
+ return 0;
+}
+
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ int ret = 0;
+
+ mutex_lock(&vxlan->sync_lock);
+
+ vxlanp = vxlan_lookup_port(vxlan, port);
+ if (WARN_ON(!vxlanp)) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
+
+ hash_del_rcu(&vxlanp->hlist);
+ synchronize_rcu();
+ mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+ kfree(vxlanp);
+
+out_unlock:
+ mutex_unlock(&vxlan->sync_lock);
+ return ret;
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_vxlan *vxlan;
+
+ if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev))
+ return ERR_PTR(-ENOTSUPP);
+
+ vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
+ if (!vxlan)
+ return ERR_PTR(-ENOMEM);
+
+ vxlan->mdev = mdev;
+ mutex_init(&vxlan->sync_lock);
+ hash_init(vxlan->htable);
+
+ /* Hardware adds 4789 (IANA_VXLAN_UDP_PORT) by default */
+ mlx5_vxlan_add_port(vxlan, IANA_VXLAN_UDP_PORT);
+
+ return vxlan;
+}
+
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
+{
+ if (!mlx5_vxlan_allowed(vxlan))
+ return;
+
+ mlx5_vxlan_del_port(vxlan, IANA_VXLAN_UDP_PORT);
+ WARN_ON(!hash_empty(vxlan->htable));
+
+ kfree(vxlan);
+}
+
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ struct hlist_node *tmp;
+ int bkt;
+
+ if (!mlx5_vxlan_allowed(vxlan))
+ return;
+
+ hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
+ /* Don't delete default UDP port added by the HW.
+ * Remove only user configured ports
+ */
+ if (vxlanp->udp_port == IANA_VXLAN_UDP_PORT)
+ continue;
+ mlx5_vxlan_del_port(vxlan, vxlanp->udp_port);
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
new file mode 100644
index 000000000..34ef662da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_VXLAN_H__
+#define __MLX5_VXLAN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vxlan;
+struct mlx5_vxlan_port;
+
+static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
+static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
+{
+ /* not allowed reason is encoded in vxlan pointer as error,
+ * on mlx5_vxlan_create
+ */
+ return !IS_ERR_OR_NULL(vxlan);
+}
+
+#if IS_ENABLED(CONFIG_VXLAN)
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
+bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan);
+#else
+static inline struct mlx5_vxlan*
+mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
+static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
+static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
+static inline bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return false; }
+static inline void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) { return; }
+#endif
+
+#endif /* __MLX5_VXLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
new file mode 100644
index 000000000..6ab0642e9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -0,0 +1,2130 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/io-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+#include <linux/debugfs.h>
+#include <linux/kmod.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+#include <linux/version.h>
+#include <net/devlink.h>
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "fs_core.h"
+#include "lib/mpfs.h"
+#include "eswitch.h"
+#include "devlink.h"
+#include "fw_reset.h"
+#include "lib/mlx5.h"
+#include "lib/tout.h"
+#include "fpga/core.h"
+#include "en_accel/ipsec.h"
+#include "lib/clock.h"
+#include "lib/vxlan.h"
+#include "lib/geneve.h"
+#include "lib/devcom.h"
+#include "lib/pci_vsc.h"
+#include "diag/fw_tracer.h"
+#include "ecpf.h"
+#include "lib/hv_vhca.h"
+#include "diag/rsc_dump.h"
+#include "sf/vhca_event.h"
+#include "sf/dev/dev.h"
+#include "sf/sf.h"
+#include "mlx5_irq.h"
+
+MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
+MODULE_LICENSE("Dual BSD/GPL");
+
+unsigned int mlx5_core_debug_mask;
+module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644);
+MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
+
+static unsigned int prof_sel = MLX5_DEFAULT_PROF;
+module_param_named(prof_sel, prof_sel, uint, 0444);
+MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+
+static u32 sw_owner_id[4];
+#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1)
+static DEFINE_IDA(sw_vhca_ida);
+
+enum {
+ MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+};
+
+#define LOG_MAX_SUPPORTED_QPS 0xff
+
+static struct mlx5_profile profile[] = {
+ [0] = {
+ .mask = 0,
+ },
+ [1] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE,
+ .log_max_qp = 12,
+ },
+ [2] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE |
+ MLX5_PROF_MASK_MR_CACHE,
+ .log_max_qp = LOG_MAX_SUPPORTED_QPS,
+ .mr_cache[0] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[1] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[2] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[3] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[4] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[5] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[6] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[7] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[8] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[9] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[10] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[11] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[12] = {
+ .size = 64,
+ .limit = 32
+ },
+ .mr_cache[13] = {
+ .size = 32,
+ .limit = 16
+ },
+ .mr_cache[14] = {
+ .size = 16,
+ .limit = 8
+ },
+ .mr_cache[15] = {
+ .size = 8,
+ .limit = 4
+ },
+ },
+};
+
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
+ u32 warn_time_mili)
+{
+ unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
+ unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
+ u32 fw_initializing;
+ int err = 0;
+
+ do {
+ fw_initializing = ioread32be(&dev->iseg->initializing);
+ if (!(fw_initializing >> 31))
+ break;
+ if (time_after(jiffies, end) ||
+ test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
+ err = -EBUSY;
+ break;
+ }
+ if (warn_time_mili && time_after(jiffies, warn)) {
+ mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds (0x%x)\n",
+ jiffies_to_msecs(end - warn) / 1000, fw_initializing);
+ warn = jiffies + msecs_to_jiffies(warn_time_mili);
+ }
+ msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT));
+ } while (true);
+
+ return err;
+}
+
+static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
+{
+ int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
+ driver_version);
+ u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {};
+ int remaining_size = driver_ver_sz;
+ char *string;
+
+ if (!MLX5_CAP_GEN(dev, driver_version))
+ return;
+
+ string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
+
+ strncpy(string, "Linux", remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+ strncat(string, ",", remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+ strncat(string, KBUILD_MODNAME, remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+ strncat(string, ",", remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+
+ snprintf(string + strlen(string), remaining_size, "%u.%u.%u",
+ LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
+ LINUX_VERSION_SUBLEVEL);
+
+ /*Send the command*/
+ MLX5_SET(set_driver_version_in, in, opcode,
+ MLX5_CMD_OP_SET_DRIVER_VERSION);
+
+ mlx5_cmd_exec_in(dev, set_driver_version, in);
+}
+
+static int set_dma_caps(struct pci_dev *pdev)
+{
+ int err;
+
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
+ return err;
+ }
+ }
+
+ dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
+ return err;
+}
+
+static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err = 0;
+
+ mutex_lock(&dev->pci_status_mutex);
+ if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
+ err = pci_enable_device(pdev);
+ if (!err)
+ dev->pci_status = MLX5_PCI_STATUS_ENABLED;
+ }
+ mutex_unlock(&dev->pci_status_mutex);
+
+ return err;
+}
+
+static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+
+ mutex_lock(&dev->pci_status_mutex);
+ if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
+ pci_disable_device(pdev);
+ dev->pci_status = MLX5_PCI_STATUS_DISABLED;
+ }
+ mutex_unlock(&dev->pci_status_mutex);
+}
+
+static int request_bar(struct pci_dev *pdev)
+{
+ int err = 0;
+
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing registers BAR, aborting\n");
+ return -ENODEV;
+ }
+
+ err = pci_request_regions(pdev, KBUILD_MODNAME);
+ if (err)
+ dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
+
+ return err;
+}
+
+static void release_bar(struct pci_dev *pdev)
+{
+ pci_release_regions(pdev);
+}
+
+struct mlx5_reg_host_endianness {
+ u8 he;
+ u8 rsvd[15];
+};
+
+static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
+{
+ switch (size) {
+ case 128:
+ return 0;
+ case 256:
+ return 1;
+ case 512:
+ return 2;
+ case 1024:
+ return 3;
+ case 2048:
+ return 4;
+ case 4096:
+ return 5;
+ default:
+ mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
+ return 0;
+ }
+}
+
+static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
+ enum mlx5_cap_type cap_type,
+ enum mlx5_cap_mode cap_mode)
+{
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *out, *hca_caps;
+ u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
+ int err;
+
+ memset(in, 0, sizeof(in));
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
+ if (err) {
+ mlx5_core_warn(dev,
+ "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
+ cap_type, cap_mode, err);
+ goto query_ex;
+ }
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+
+ switch (cap_mode) {
+ case HCA_CAP_OPMOD_GET_MAX:
+ memcpy(dev->caps.hca[cap_type]->max, hca_caps,
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ break;
+ case HCA_CAP_OPMOD_GET_CUR:
+ memcpy(dev->caps.hca[cap_type]->cur, hca_caps,
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ break;
+ default:
+ mlx5_core_warn(dev,
+ "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
+ cap_type, cap_mode);
+ err = -EINVAL;
+ break;
+ }
+query_ex:
+ kfree(out);
+ return err;
+}
+
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
+{
+ int ret;
+
+ ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
+ if (ret)
+ return ret;
+ return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
+static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
+{
+ MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
+ return mlx5_cmd_exec_in(dev, set_hca_cap, in);
+}
+
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
+{
+ void *set_hca_cap;
+ int req_endianness;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, atomic))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
+ if (err)
+ return err;
+
+ req_endianness =
+ MLX5_CAP_ATOMIC(dev,
+ supported_atomic_req_8B_endianness_mode_1);
+
+ if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+
+ /* Set requestor to host endianness */
+ MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
+
+ return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
+}
+
+static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
+{
+ void *set_hca_cap;
+ bool do_set = false;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) ||
+ !MLX5_CAP_GEN(dev, pg))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
+ if (err)
+ return err;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur,
+ MLX5_ST_SZ_BYTES(odp_cap));
+
+#define ODP_CAP_SET_MAX(dev, field) \
+ do { \
+ u32 _res = MLX5_CAP_ODP_MAX(dev, field); \
+ if (_res) { \
+ do_set = true; \
+ MLX5_SET(odp_cap, set_hca_cap, field, _res); \
+ } \
+ } while (0)
+
+ ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.send);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
+ ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, dc_odp_caps.send);
+ ODP_CAP_SET_MAX(dev, dc_odp_caps.receive);
+ ODP_CAP_SET_MAX(dev, dc_odp_caps.write);
+ ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
+ ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
+
+ if (!do_set)
+ return 0;
+
+ return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
+}
+
+static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ &val);
+ if (!err)
+ return val.vu32;
+ mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
+ return err;
+}
+
+bool mlx5_is_roce_on(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+ &val);
+
+ if (!err)
+ return val.vbool;
+
+ mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
+ return MLX5_CAP_GEN(dev, roce);
+}
+EXPORT_SYMBOL(mlx5_is_roce_on);
+
+static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
+{
+ void *set_hca_cap;
+ int err;
+
+ if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
+ if (err)
+ return err;
+
+ if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) ||
+ !(dev->priv.sw_vhca_id > 0))
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
+ capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur,
+ MLX5_ST_SZ_BYTES(cmd_hca_cap_2));
+ MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1);
+
+ return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2);
+}
+
+static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+{
+ struct mlx5_profile *prof = &dev->profile;
+ void *set_hca_cap;
+ int max_uc_list;
+ int err;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
+ if (err)
+ return err;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
+ capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur,
+ MLX5_ST_SZ_BYTES(cmd_hca_cap));
+
+ mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
+ mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
+ 128);
+ /* we limit the size of the pkey table to 128 entries for now */
+ MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
+ to_fw_pkey_sz(dev, 128));
+
+ /* Check log_max_qp from HCA caps to set in current profile */
+ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) {
+ prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp));
+ } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
+ mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
+ prof->log_max_qp,
+ MLX5_CAP_GEN_MAX(dev, log_max_qp));
+ prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+ }
+ if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
+ MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
+ prof->log_max_qp);
+
+ /* disable cmdif checksum */
+ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
+
+ /* Enable 4K UAR only when HCA supports it and page size is bigger
+ * than 4K.
+ */
+ if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
+ MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
+
+ MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
+
+ if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte))
+ MLX5_SET(cmd_hca_cap,
+ set_hca_cap,
+ cache_line_128byte,
+ cache_line_size() >= 128 ? 1 : 0);
+
+ if (MLX5_CAP_GEN_MAX(dev, dct))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
+
+ if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1);
+
+ if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
+ MLX5_SET(cmd_hca_cap,
+ set_hca_cap,
+ num_vhca_ports,
+ MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
+
+ if (MLX5_CAP_GEN_MAX(dev, release_all_pages))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1);
+
+ if (MLX5_CAP_GEN_MAX(dev, mkey_by_name))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
+
+ mlx5_vhca_state_cap_handle(dev, set_hca_cap);
+
+ if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix,
+ MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
+
+ if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
+ mlx5_is_roce_on(dev));
+
+ max_uc_list = max_uc_list_get_devlink_param(dev);
+ if (max_uc_list > 0)
+ MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list,
+ ilog2(max_uc_list));
+
+ return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+}
+
+/* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the
+ * boot process.
+ * In case RoCE cap is writable in FW and user/devlink requested to change the
+ * cap, we are yet to query the final state of the above cap.
+ * Hence, the need for this function.
+ *
+ * Returns
+ * True:
+ * 1) RoCE cap is read only in FW and already disabled
+ * OR:
+ * 2) RoCE cap is writable in FW and user/devlink requested it off.
+ *
+ * In any other case, return False.
+ */
+static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
+{
+ return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) ||
+ (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
+}
+
+static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
+{
+ void *set_hca_cap;
+ int err;
+
+ if (is_roce_fw_disabled(dev))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) ||
+ !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port))
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur,
+ MLX5_ST_SZ_BYTES(roce_cap));
+ MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
+
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
+ return err;
+}
+
+static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev,
+ void *set_ctx)
+{
+ void *set_hca_cap;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, port_selection_cap))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass) ||
+ !MLX5_CAP_PORT_SELECTION_MAX(dev, port_select_flow_table_bypass))
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur,
+ MLX5_ST_SZ_BYTES(port_selection_cap));
+ MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1);
+
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION);
+
+ return err;
+}
+
+static int set_hca_cap(struct mlx5_core_dev *dev)
+{
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *set_ctx;
+ int err;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ err = handle_hca_cap(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap failed\n");
+ goto out;
+ }
+
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_atomic(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
+ goto out;
+ }
+
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_odp(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
+ goto out;
+ }
+
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_roce(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_roce failed\n");
+ goto out;
+ }
+
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_2(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_2 failed\n");
+ goto out;
+ }
+
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_port_selection(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_port_selection failed\n");
+ goto out;
+ }
+
+out:
+ kfree(set_ctx);
+ return err;
+}
+
+static int set_hca_ctrl(struct mlx5_core_dev *dev)
+{
+ struct mlx5_reg_host_endianness he_in;
+ struct mlx5_reg_host_endianness he_out;
+ int err;
+
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ memset(&he_in, 0, sizeof(he_in));
+ he_in.he = MLX5_SET_HOST_ENDIANNESS;
+ err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in),
+ &he_out, sizeof(he_out),
+ MLX5_REG_HOST_ENDIANNESS, 0, 1);
+ return err;
+}
+
+static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
+{
+ int ret = 0;
+
+ /* Disable local_lb by default */
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+ ret = mlx5_nic_vport_update_local_lb(dev, false);
+
+ return ret;
+}
+
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
+
+ MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+ MLX5_SET(enable_hca_in, in, function_id, func_id);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+ dev->caps.embedded_cpu);
+ return mlx5_cmd_exec_in(dev, enable_hca, in);
+}
+
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
+
+ MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+ MLX5_SET(disable_hca_in, in, function_id, func_id);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+ dev->caps.embedded_cpu);
+ return mlx5_cmd_exec_in(dev, disable_hca, in);
+}
+
+static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
+{
+ u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {};
+ u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {};
+ u32 sup_issi;
+ int err;
+
+ MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
+ err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out);
+ if (err) {
+ u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome);
+ u8 status = MLX5_GET(query_issi_out, query_out, status);
+
+ if (!status || syndrome == MLX5_DRIVER_SYND) {
+ mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
+ err, status, syndrome);
+ return err;
+ }
+
+ mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n");
+ dev->issi = 0;
+ return 0;
+ }
+
+ sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
+
+ if (sup_issi & (1 << 1)) {
+ u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {};
+
+ MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
+ MLX5_SET(set_issi_in, set_in, current_issi, 1);
+ err = mlx5_cmd_exec_in(dev, set_issi, set_in);
+ if (err) {
+ mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
+ err);
+ return err;
+ }
+
+ dev->issi = 1;
+
+ return 0;
+ } else if (sup_issi & (1 << 0) || !sup_issi) {
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int err = 0;
+
+ mutex_init(&dev->pci_status_mutex);
+ pci_set_drvdata(dev->pdev, dev);
+
+ dev->bar_addr = pci_resource_start(pdev, 0);
+
+ err = mlx5_pci_enable_device(dev);
+ if (err) {
+ mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
+ return err;
+ }
+
+ err = request_bar(pdev);
+ if (err) {
+ mlx5_core_err(dev, "error requesting BARs, aborting\n");
+ goto err_disable;
+ }
+
+ pci_set_master(pdev);
+
+ err = set_dma_caps(pdev);
+ if (err) {
+ mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
+ goto err_clr_master;
+ }
+
+ if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
+ mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
+
+ dev->iseg_base = dev->bar_addr;
+ dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
+ if (!dev->iseg) {
+ err = -ENOMEM;
+ mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
+ goto err_clr_master;
+ }
+
+ mlx5_pci_vsc_init(dev);
+ return 0;
+
+err_clr_master:
+ pci_clear_master(dev->pdev);
+ release_bar(dev->pdev);
+err_disable:
+ mlx5_pci_disable_device(dev);
+ return err;
+}
+
+static void mlx5_pci_close(struct mlx5_core_dev *dev)
+{
+ /* health work might still be active, and it needs pci bar in
+ * order to know the NIC state. Therefore, drain the health WQ
+ * before removing the pci bars
+ */
+ mlx5_drain_health_wq(dev);
+ iounmap(dev->iseg);
+ pci_clear_master(dev->pdev);
+ release_bar(dev->pdev);
+ mlx5_pci_disable_device(dev);
+}
+
+static int mlx5_init_once(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ dev->priv.devcom = mlx5_devcom_register_device(dev);
+ if (IS_ERR(dev->priv.devcom))
+ mlx5_core_err(dev, "failed to register with devcom (0x%p)\n",
+ dev->priv.devcom);
+
+ err = mlx5_query_board_id(dev);
+ if (err) {
+ mlx5_core_err(dev, "query board id failed\n");
+ goto err_devcom;
+ }
+
+ err = mlx5_irq_table_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize irq table\n");
+ goto err_devcom;
+ }
+
+ err = mlx5_eq_table_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize eq\n");
+ goto err_irq_cleanup;
+ }
+
+ err = mlx5_events_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize events\n");
+ goto err_eq_cleanup;
+ }
+
+ err = mlx5_fw_reset_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize fw reset events\n");
+ goto err_events_cleanup;
+ }
+
+ mlx5_cq_debugfs_init(dev);
+
+ mlx5_init_reserved_gids(dev);
+
+ mlx5_init_clock(dev);
+
+ dev->vxlan = mlx5_vxlan_create(dev);
+ dev->geneve = mlx5_geneve_create(dev);
+
+ err = mlx5_init_rl_table(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init rate limiting\n");
+ goto err_tables_cleanup;
+ }
+
+ err = mlx5_mpfs_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init l2 table %d\n", err);
+ goto err_rl_cleanup;
+ }
+
+ err = mlx5_sriov_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init sriov %d\n", err);
+ goto err_mpfs_cleanup;
+ }
+
+ err = mlx5_eswitch_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+ goto err_sriov_cleanup;
+ }
+
+ err = mlx5_fpga_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
+ goto err_eswitch_cleanup;
+ }
+
+ err = mlx5_vhca_event_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init vhca event notifier %d\n", err);
+ goto err_fpga_cleanup;
+ }
+
+ err = mlx5_sf_hw_table_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init SF HW table %d\n", err);
+ goto err_sf_hw_table_cleanup;
+ }
+
+ err = mlx5_sf_table_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init SF table %d\n", err);
+ goto err_sf_table_cleanup;
+ }
+
+ err = mlx5_fs_core_alloc(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc flow steering\n");
+ goto err_fs;
+ }
+
+ dev->dm = mlx5_dm_create(dev);
+ if (IS_ERR(dev->dm))
+ mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm));
+
+ dev->tracer = mlx5_fw_tracer_create(dev);
+ dev->hv_vhca = mlx5_hv_vhca_create(dev);
+ dev->rsc_dump = mlx5_rsc_dump_create(dev);
+
+ return 0;
+
+err_fs:
+ mlx5_sf_table_cleanup(dev);
+err_sf_table_cleanup:
+ mlx5_sf_hw_table_cleanup(dev);
+err_sf_hw_table_cleanup:
+ mlx5_vhca_event_cleanup(dev);
+err_fpga_cleanup:
+ mlx5_fpga_cleanup(dev);
+err_eswitch_cleanup:
+ mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_sriov_cleanup:
+ mlx5_sriov_cleanup(dev);
+err_mpfs_cleanup:
+ mlx5_mpfs_cleanup(dev);
+err_rl_cleanup:
+ mlx5_cleanup_rl_table(dev);
+err_tables_cleanup:
+ mlx5_geneve_destroy(dev->geneve);
+ mlx5_vxlan_destroy(dev->vxlan);
+ mlx5_cleanup_clock(dev);
+ mlx5_cleanup_reserved_gids(dev);
+ mlx5_cq_debugfs_cleanup(dev);
+ mlx5_fw_reset_cleanup(dev);
+err_events_cleanup:
+ mlx5_events_cleanup(dev);
+err_eq_cleanup:
+ mlx5_eq_table_cleanup(dev);
+err_irq_cleanup:
+ mlx5_irq_table_cleanup(dev);
+err_devcom:
+ mlx5_devcom_unregister_device(dev->priv.devcom);
+
+ return err;
+}
+
+static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
+{
+ mlx5_rsc_dump_destroy(dev);
+ mlx5_hv_vhca_destroy(dev->hv_vhca);
+ mlx5_fw_tracer_destroy(dev->tracer);
+ mlx5_dm_cleanup(dev);
+ mlx5_fs_core_free(dev);
+ mlx5_sf_table_cleanup(dev);
+ mlx5_sf_hw_table_cleanup(dev);
+ mlx5_vhca_event_cleanup(dev);
+ mlx5_fpga_cleanup(dev);
+ mlx5_eswitch_cleanup(dev->priv.eswitch);
+ mlx5_sriov_cleanup(dev);
+ mlx5_mpfs_cleanup(dev);
+ mlx5_cleanup_rl_table(dev);
+ mlx5_geneve_destroy(dev->geneve);
+ mlx5_vxlan_destroy(dev->vxlan);
+ mlx5_cleanup_clock(dev);
+ mlx5_cleanup_reserved_gids(dev);
+ mlx5_cq_debugfs_cleanup(dev);
+ mlx5_fw_reset_cleanup(dev);
+ mlx5_events_cleanup(dev);
+ mlx5_eq_table_cleanup(dev);
+ mlx5_irq_table_cleanup(dev);
+ mlx5_devcom_unregister_device(dev->priv.devcom);
+}
+
+static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout)
+{
+ int err;
+
+ mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+ fw_rev_min(dev), fw_rev_sub(dev));
+
+ /* Only PFs hold the relevant PCIe information for this query */
+ if (mlx5_core_is_pf(dev))
+ pcie_print_link_status(dev->pdev);
+
+ /* wait for firmware to accept initialization segments configurations
+ */
+ err = wait_fw_init(dev, timeout,
+ mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL));
+ if (err) {
+ mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
+ timeout);
+ return err;
+ }
+
+ err = mlx5_cmd_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
+ return err;
+ }
+
+ mlx5_tout_query_iseg(dev);
+
+ err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0);
+ if (err) {
+ mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n",
+ mlx5_tout_ms(dev, FW_INIT));
+ goto err_cmd_cleanup;
+ }
+
+ dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
+ mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
+
+ mlx5_start_health_poll(dev);
+
+ err = mlx5_core_enable_hca(dev, 0);
+ if (err) {
+ mlx5_core_err(dev, "enable hca failed\n");
+ goto stop_health_poll;
+ }
+
+ err = mlx5_core_set_issi(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to set issi\n");
+ goto err_disable_hca;
+ }
+
+ err = mlx5_satisfy_startup_pages(dev, 1);
+ if (err) {
+ mlx5_core_err(dev, "failed to allocate boot pages\n");
+ goto err_disable_hca;
+ }
+
+ err = mlx5_tout_query_dtor(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to read dtor\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = set_hca_ctrl(dev);
+ if (err) {
+ mlx5_core_err(dev, "set_hca_ctrl failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = set_hca_cap(dev);
+ if (err) {
+ mlx5_core_err(dev, "set_hca_cap failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = mlx5_satisfy_startup_pages(dev, 0);
+ if (err) {
+ mlx5_core_err(dev, "failed to allocate init pages\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = mlx5_cmd_init_hca(dev, sw_owner_id);
+ if (err) {
+ mlx5_core_err(dev, "init hca failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ mlx5_set_driver_version(dev);
+
+ err = mlx5_query_hca_caps(dev);
+ if (err) {
+ mlx5_core_err(dev, "query hca failed\n");
+ goto reclaim_boot_pages;
+ }
+ mlx5_start_health_fw_log_up(dev);
+
+ return 0;
+
+reclaim_boot_pages:
+ mlx5_reclaim_startup_pages(dev);
+err_disable_hca:
+ mlx5_core_disable_hca(dev, 0);
+stop_health_poll:
+ mlx5_stop_health_poll(dev, boot);
+err_cmd_cleanup:
+ mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
+ mlx5_cmd_cleanup(dev);
+
+ return err;
+}
+
+static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
+{
+ int err;
+
+ err = mlx5_cmd_teardown_hca(dev);
+ if (err) {
+ mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
+ return err;
+ }
+ mlx5_reclaim_startup_pages(dev);
+ mlx5_core_disable_hca(dev, 0);
+ mlx5_stop_health_poll(dev, boot);
+ mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
+ mlx5_cmd_cleanup(dev);
+
+ return 0;
+}
+
+static int mlx5_load(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ dev->priv.uar = mlx5_get_uars_page(dev);
+ if (IS_ERR(dev->priv.uar)) {
+ mlx5_core_err(dev, "Failed allocating uar, aborting\n");
+ err = PTR_ERR(dev->priv.uar);
+ return err;
+ }
+
+ mlx5_events_start(dev);
+ mlx5_pagealloc_start(dev);
+
+ err = mlx5_irq_table_create(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc IRQs\n");
+ goto err_irq_table;
+ }
+
+ err = mlx5_eq_table_create(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create EQs\n");
+ goto err_eq_table;
+ }
+
+ err = mlx5_fw_tracer_init(dev->tracer);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init FW tracer %d\n", err);
+ mlx5_fw_tracer_destroy(dev->tracer);
+ dev->tracer = NULL;
+ }
+
+ mlx5_fw_reset_events_start(dev);
+ mlx5_hv_vhca_init(dev->hv_vhca);
+
+ err = mlx5_rsc_dump_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init Resource dump %d\n", err);
+ mlx5_rsc_dump_destroy(dev);
+ dev->rsc_dump = NULL;
+ }
+
+ err = mlx5_fpga_device_start(dev);
+ if (err) {
+ mlx5_core_err(dev, "fpga device start failed %d\n", err);
+ goto err_fpga_start;
+ }
+
+ err = mlx5_fs_core_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init flow steering\n");
+ goto err_fs;
+ }
+
+ err = mlx5_core_set_hca_defaults(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to set hca defaults\n");
+ goto err_set_hca;
+ }
+
+ mlx5_vhca_event_start(dev);
+
+ err = mlx5_sf_hw_table_create(dev);
+ if (err) {
+ mlx5_core_err(dev, "sf table create failed %d\n", err);
+ goto err_vhca;
+ }
+
+ err = mlx5_ec_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to init embedded CPU\n");
+ goto err_ec;
+ }
+
+ mlx5_lag_add_mdev(dev);
+ err = mlx5_sriov_attach(dev);
+ if (err) {
+ mlx5_core_err(dev, "sriov init failed %d\n", err);
+ goto err_sriov;
+ }
+
+ mlx5_sf_dev_table_create(dev);
+
+ return 0;
+
+err_sriov:
+ mlx5_lag_remove_mdev(dev);
+ mlx5_ec_cleanup(dev);
+err_ec:
+ mlx5_sf_hw_table_destroy(dev);
+err_vhca:
+ mlx5_vhca_event_stop(dev);
+err_set_hca:
+ mlx5_fs_core_cleanup(dev);
+err_fs:
+ mlx5_fpga_device_stop(dev);
+err_fpga_start:
+ mlx5_rsc_dump_cleanup(dev);
+ mlx5_hv_vhca_cleanup(dev->hv_vhca);
+ mlx5_fw_reset_events_stop(dev);
+ mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_eq_table_destroy(dev);
+err_eq_table:
+ mlx5_irq_table_destroy(dev);
+err_irq_table:
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
+ mlx5_put_uars_page(dev, dev->priv.uar);
+ return err;
+}
+
+static void mlx5_unload(struct mlx5_core_dev *dev)
+{
+ mlx5_sf_dev_table_destroy(dev);
+ mlx5_eswitch_disable(dev->priv.eswitch);
+ mlx5_sriov_detach(dev);
+ mlx5_lag_remove_mdev(dev);
+ mlx5_ec_cleanup(dev);
+ mlx5_sf_hw_table_destroy(dev);
+ mlx5_vhca_event_stop(dev);
+ mlx5_fs_core_cleanup(dev);
+ mlx5_fpga_device_stop(dev);
+ mlx5_rsc_dump_cleanup(dev);
+ mlx5_hv_vhca_cleanup(dev->hv_vhca);
+ mlx5_fw_reset_events_stop(dev);
+ mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_eq_table_destroy(dev);
+ mlx5_irq_table_destroy(dev);
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
+ mlx5_put_uars_page(dev, dev->priv.uar);
+}
+
+int mlx5_init_one(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ int err = 0;
+
+ devl_lock(devlink);
+ mutex_lock(&dev->intf_state_mutex);
+ dev->state = MLX5_DEVICE_STATE_UP;
+
+ err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
+ if (err)
+ goto err_function;
+
+ err = mlx5_init_once(dev);
+ if (err) {
+ mlx5_core_err(dev, "sw objs init failed\n");
+ goto function_teardown;
+ }
+
+ err = mlx5_load(dev);
+ if (err)
+ goto err_load;
+
+ set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+
+ err = mlx5_devlink_register(priv_to_devlink(dev));
+ if (err)
+ goto err_devlink_reg;
+
+ err = mlx5_register_device(dev);
+ if (err)
+ goto err_register;
+
+ mutex_unlock(&dev->intf_state_mutex);
+ devl_unlock(devlink);
+ return 0;
+
+err_register:
+ mlx5_devlink_unregister(priv_to_devlink(dev));
+err_devlink_reg:
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+ mlx5_unload(dev);
+err_load:
+ mlx5_cleanup_once(dev);
+function_teardown:
+ mlx5_function_teardown(dev, true);
+err_function:
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ mutex_unlock(&dev->intf_state_mutex);
+ devl_unlock(devlink);
+ return err;
+}
+
+void mlx5_uninit_one(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+
+ devl_lock(devlink);
+ mutex_lock(&dev->intf_state_mutex);
+
+ mlx5_unregister_device(dev);
+ mlx5_devlink_unregister(priv_to_devlink(dev));
+
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_warn(dev, "%s: interface is down, NOP\n",
+ __func__);
+ mlx5_cleanup_once(dev);
+ goto out;
+ }
+
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+ mlx5_unload(dev);
+ mlx5_cleanup_once(dev);
+ mlx5_function_teardown(dev, true);
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+ devl_unlock(devlink);
+}
+
+int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery)
+{
+ int err = 0;
+ u64 timeout;
+
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&dev->intf_state_mutex);
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_warn(dev, "interface is up, NOP\n");
+ goto out;
+ }
+ /* remove any previous indication of internal error */
+ dev->state = MLX5_DEVICE_STATE_UP;
+
+ if (recovery)
+ timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT);
+ else
+ timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT);
+ err = mlx5_function_setup(dev, false, timeout);
+ if (err)
+ goto err_function;
+
+ err = mlx5_load(dev);
+ if (err)
+ goto err_load;
+
+ set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+
+ err = mlx5_attach_device(dev);
+ if (err)
+ goto err_attach;
+
+ mutex_unlock(&dev->intf_state_mutex);
+ return 0;
+
+err_attach:
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+ mlx5_unload(dev);
+err_load:
+ mlx5_function_teardown(dev, false);
+err_function:
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+ return err;
+}
+
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ int ret;
+
+ devl_lock(devlink);
+ ret = mlx5_load_one_devl_locked(dev, recovery);
+ devl_unlock(devlink);
+ return ret;
+}
+
+void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend)
+{
+ devl_assert_locked(priv_to_devlink(dev));
+ mutex_lock(&dev->intf_state_mutex);
+
+ mlx5_detach_device(dev, suspend);
+
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_warn(dev, "%s: interface is down, NOP\n",
+ __func__);
+ goto out;
+ }
+
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+ mlx5_unload(dev);
+ mlx5_function_teardown(dev, false);
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+}
+
+void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+
+ devl_lock(devlink);
+ mlx5_unload_one_devl_locked(dev, suspend);
+ devl_unlock(devlink);
+}
+
+static const int types[] = {
+ MLX5_CAP_GENERAL,
+ MLX5_CAP_GENERAL_2,
+ MLX5_CAP_ETHERNET_OFFLOADS,
+ MLX5_CAP_IPOIB_ENHANCED_OFFLOADS,
+ MLX5_CAP_ODP,
+ MLX5_CAP_ATOMIC,
+ MLX5_CAP_ROCE,
+ MLX5_CAP_IPOIB_OFFLOADS,
+ MLX5_CAP_FLOW_TABLE,
+ MLX5_CAP_ESWITCH_FLOW_TABLE,
+ MLX5_CAP_ESWITCH,
+ MLX5_CAP_VECTOR_CALC,
+ MLX5_CAP_QOS,
+ MLX5_CAP_DEBUG,
+ MLX5_CAP_DEV_MEM,
+ MLX5_CAP_DEV_EVENT,
+ MLX5_CAP_TLS,
+ MLX5_CAP_VDPA_EMULATION,
+ MLX5_CAP_IPSEC,
+ MLX5_CAP_PORT_SELECTION,
+ MLX5_CAP_DEV_SHAMPO,
+ MLX5_CAP_MACSEC,
+ MLX5_CAP_ADV_VIRTUALIZATION,
+};
+
+static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
+{
+ int type;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(types); i++) {
+ type = types[i];
+ kfree(dev->caps.hca[type]);
+ }
+}
+
+static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_hca_cap *cap;
+ int type;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(types); i++) {
+ cap = kzalloc(sizeof(*cap), GFP_KERNEL);
+ if (!cap)
+ goto err;
+ type = types[i];
+ dev->caps.hca[type] = cap;
+ }
+
+ return 0;
+
+err:
+ mlx5_hca_caps_free(dev);
+ return -ENOMEM;
+}
+
+int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ int err;
+
+ memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
+ INIT_LIST_HEAD(&priv->ctx_list);
+ spin_lock_init(&priv->ctx_lock);
+ lockdep_register_key(&dev->lock_key);
+ mutex_init(&dev->intf_state_mutex);
+ lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
+
+ mutex_init(&priv->bfregs.reg_head.lock);
+ mutex_init(&priv->bfregs.wc_head.lock);
+ INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
+ INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
+
+ mutex_init(&priv->alloc_mutex);
+ mutex_init(&priv->pgdir_mutex);
+ INIT_LIST_HEAD(&priv->pgdir_list);
+
+ priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
+ priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device),
+ mlx5_debugfs_root);
+ INIT_LIST_HEAD(&priv->traps);
+
+ err = mlx5_tout_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed initializing timeouts, aborting\n");
+ goto err_timeout_init;
+ }
+
+ err = mlx5_health_init(dev);
+ if (err)
+ goto err_health_init;
+
+ err = mlx5_pagealloc_init(dev);
+ if (err)
+ goto err_pagealloc_init;
+
+ err = mlx5_adev_init(dev);
+ if (err)
+ goto err_adev_init;
+
+ err = mlx5_hca_caps_alloc(dev);
+ if (err)
+ goto err_hca_caps;
+
+ /* The conjunction of sw_vhca_id with sw_owner_id will be a global
+ * unique id per function which uses mlx5_core.
+ * Those values are supplied to FW as part of the init HCA command to
+ * be used by both driver and FW when it's applicable.
+ */
+ dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1,
+ MAX_SW_VHCA_ID,
+ GFP_KERNEL);
+ if (dev->priv.sw_vhca_id < 0)
+ mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n",
+ dev->priv.sw_vhca_id);
+
+ return 0;
+
+err_hca_caps:
+ mlx5_adev_cleanup(dev);
+err_adev_init:
+ mlx5_pagealloc_cleanup(dev);
+err_pagealloc_init:
+ mlx5_health_cleanup(dev);
+err_health_init:
+ mlx5_tout_cleanup(dev);
+err_timeout_init:
+ debugfs_remove(dev->priv.dbg.dbg_root);
+ mutex_destroy(&priv->pgdir_mutex);
+ mutex_destroy(&priv->alloc_mutex);
+ mutex_destroy(&priv->bfregs.wc_head.lock);
+ mutex_destroy(&priv->bfregs.reg_head.lock);
+ mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
+ return err;
+}
+
+void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
+ if (priv->sw_vhca_id > 0)
+ ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id);
+
+ mlx5_hca_caps_free(dev);
+ mlx5_adev_cleanup(dev);
+ mlx5_pagealloc_cleanup(dev);
+ mlx5_health_cleanup(dev);
+ mlx5_tout_cleanup(dev);
+ debugfs_remove_recursive(dev->priv.dbg.dbg_root);
+ mutex_destroy(&priv->pgdir_mutex);
+ mutex_destroy(&priv->alloc_mutex);
+ mutex_destroy(&priv->bfregs.wc_head.lock);
+ mutex_destroy(&priv->bfregs.reg_head.lock);
+ mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
+}
+
+static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct mlx5_core_dev *dev;
+ struct devlink *devlink;
+ int err;
+
+ devlink = mlx5_devlink_alloc(&pdev->dev);
+ if (!devlink) {
+ dev_err(&pdev->dev, "devlink alloc failed\n");
+ return -ENOMEM;
+ }
+
+ dev = devlink_priv(devlink);
+ dev->device = &pdev->dev;
+ dev->pdev = pdev;
+
+ dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
+ MLX5_COREDEV_VF : MLX5_COREDEV_PF;
+
+ dev->priv.adev_idx = mlx5_adev_idx_alloc();
+ if (dev->priv.adev_idx < 0) {
+ err = dev->priv.adev_idx;
+ goto adev_init_err;
+ }
+
+ err = mlx5_mdev_init(dev, prof_sel);
+ if (err)
+ goto mdev_init_err;
+
+ err = mlx5_pci_init(dev, pdev, id);
+ if (err) {
+ mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n",
+ err);
+ goto pci_init_err;
+ }
+
+ err = mlx5_init_one(dev);
+ if (err) {
+ mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
+ err);
+ goto err_init_one;
+ }
+
+ err = mlx5_crdump_enable(dev);
+ if (err)
+ dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
+ pci_save_state(pdev);
+ devlink_register(devlink);
+ return 0;
+
+err_init_one:
+ mlx5_pci_close(dev);
+pci_init_err:
+ mlx5_mdev_uninit(dev);
+mdev_init_err:
+ mlx5_adev_idx_free(dev->priv.adev_idx);
+adev_init_err:
+ mlx5_devlink_free(devlink);
+
+ return err;
+}
+
+static void remove_one(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(dev);
+
+ set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
+ /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
+ * devlink notify APIs.
+ * Hence, we must drain them before unregistering the devlink.
+ */
+ mlx5_drain_fw_reset(dev);
+ mlx5_drain_health_wq(dev);
+ devlink_unregister(devlink);
+ mlx5_sriov_disable(pdev);
+ mlx5_crdump_disable(dev);
+ mlx5_uninit_one(dev);
+ mlx5_pci_close(dev);
+ mlx5_mdev_uninit(dev);
+ mlx5_adev_idx_free(dev->priv.adev_idx);
+ mlx5_devlink_free(devlink);
+}
+
+#define mlx5_pci_trace(dev, fmt, ...) ({ \
+ struct mlx5_core_dev *__dev = (dev); \
+ mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \
+ __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \
+ __dev->pci_status, ##__VA_ARGS__); \
+})
+
+static const char *result2str(enum pci_ers_result result)
+{
+ return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" :
+ result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" :
+ result == PCI_ERS_RESULT_RECOVERED ? "recovered" :
+ "unknown";
+}
+
+static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ enum pci_ers_result res;
+
+ mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state);
+
+ mlx5_enter_error_state(dev, false);
+ mlx5_error_sw_reset(dev);
+ mlx5_unload_one(dev, false);
+ mlx5_drain_health_wq(dev);
+ mlx5_pci_disable_device(dev);
+
+ res = state == pci_channel_io_perm_failure ?
+ PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+
+ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n",
+ __func__, dev->state, dev->pci_status, res, result2str(res));
+ return res;
+}
+
+/* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
+ */
+static int wait_vital(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_health *health = &dev->priv.health;
+ const int niter = 100;
+ u32 last_count = 0;
+ u32 count;
+ int i;
+
+ for (i = 0; i < niter; i++) {
+ count = ioread32be(health->health_counter);
+ if (count && count != 0xffffffff) {
+ if (last_count && last_count != count) {
+ mlx5_core_info(dev,
+ "wait vital counter value 0x%x after %d iterations\n",
+ count, i);
+ return 0;
+ }
+ last_count = count;
+ }
+ msleep(50);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+{
+ enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT;
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err;
+
+ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n",
+ __func__, dev->state, dev->pci_status);
+
+ err = mlx5_pci_enable_device(dev);
+ if (err) {
+ mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
+ __func__, err);
+ goto out;
+ }
+
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ err = wait_vital(pdev);
+ if (err) {
+ mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n",
+ __func__, err);
+ goto out;
+ }
+
+ res = PCI_ERS_RESULT_RECOVERED;
+out:
+ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n",
+ __func__, dev->state, dev->pci_status, err, res, result2str(res));
+ return res;
+}
+
+static void mlx5_pci_resume(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err;
+
+ mlx5_pci_trace(dev, "Enter, loading driver..\n");
+
+ err = mlx5_load_one(dev, false);
+
+ if (!err)
+ devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter,
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+
+ mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err,
+ !err ? "recovered" : "Failed");
+}
+
+static const struct pci_error_handlers mlx5_err_handler = {
+ .error_detected = mlx5_pci_err_detected,
+ .slot_reset = mlx5_pci_slot_reset,
+ .resume = mlx5_pci_resume
+};
+
+static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
+{
+ bool fast_teardown = false, force_teardown = false;
+ int ret = 1;
+
+ fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
+ force_teardown = MLX5_CAP_GEN(dev, force_teardown);
+
+ mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
+ mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
+
+ if (!fast_teardown && !force_teardown)
+ return -EOPNOTSUPP;
+
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
+ return -EAGAIN;
+ }
+
+ /* Panic tear down fw command will stop the PCI bus communication
+ * with the HCA, so the health poll is no longer needed.
+ */
+ mlx5_drain_health_wq(dev);
+ mlx5_stop_health_poll(dev, false);
+
+ ret = mlx5_cmd_fast_teardown_hca(dev);
+ if (!ret)
+ goto succeed;
+
+ ret = mlx5_cmd_force_teardown_hca(dev);
+ if (!ret)
+ goto succeed;
+
+ mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
+ mlx5_start_health_poll(dev);
+ return ret;
+
+succeed:
+ mlx5_enter_error_state(dev, true);
+
+ /* Some platforms requiring freeing the IRQ's in the shutdown
+ * flow. If they aren't freed they can't be allocated after
+ * kexec. There is no need to cleanup the mlx5_core software
+ * contexts.
+ */
+ mlx5_core_eq_free_irqs(dev);
+
+ return 0;
+}
+
+static void shutdown(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err;
+
+ mlx5_core_info(dev, "Shutdown was called\n");
+ set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
+ err = mlx5_try_fast_unload(dev);
+ if (err)
+ mlx5_unload_one(dev, false);
+ mlx5_pci_disable_device(dev);
+}
+
+static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+
+ mlx5_unload_one(dev, true);
+
+ return 0;
+}
+
+static int mlx5_resume(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+
+ return mlx5_load_one(dev, false);
+}
+
+static const struct pci_device_id mlx5_core_pci_table[] = {
+ { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) },
+ { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */
+ { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) },
+ { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */
+ { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) },
+ { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */
+ { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */
+ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */
+ { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */
+ { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */
+ { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */
+ { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */
+ { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */
+ { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
+ { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */
+ { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */
+ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */
+ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
+ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
+
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+ mlx5_error_sw_reset(dev);
+ mlx5_unload_one_devl_locked(dev, false);
+}
+
+int mlx5_recover_device(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_sf(dev)) {
+ mlx5_pci_disable_device(dev);
+ if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED)
+ return -EIO;
+ }
+
+ return mlx5_load_one_devl_locked(dev, true);
+}
+
+static struct pci_driver mlx5_core_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = mlx5_core_pci_table,
+ .probe = probe_one,
+ .remove = remove_one,
+ .suspend = mlx5_suspend,
+ .resume = mlx5_resume,
+ .shutdown = shutdown,
+ .err_handler = &mlx5_err_handler,
+ .sriov_configure = mlx5_core_sriov_configure,
+ .sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix,
+ .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count,
+};
+
+/**
+ * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if
+ * mlx5_core is its driver.
+ * @pdev: The associated PCI device.
+ *
+ * Upon return the interface state lock stay held to let caller uses it safely.
+ * Caller must ensure to use the returned mlx5 device for a narrow window
+ * and put it back with mlx5_vf_put_core_dev() immediately once usage was over.
+ *
+ * Return: Pointer to the associated mlx5_core_dev or NULL.
+ */
+struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *mdev;
+
+ mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver);
+ if (IS_ERR(mdev))
+ return NULL;
+
+ mutex_lock(&mdev->intf_state_mutex);
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) {
+ mutex_unlock(&mdev->intf_state_mutex);
+ return NULL;
+ }
+
+ return mdev;
+}
+EXPORT_SYMBOL(mlx5_vf_get_core_dev);
+
+/**
+ * mlx5_vf_put_core_dev - Put the mlx5 core device back.
+ * @mdev: The mlx5 core device.
+ *
+ * Upon return the interface state lock is unlocked and caller should not
+ * access the mdev any more.
+ */
+void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev)
+{
+ mutex_unlock(&mdev->intf_state_mutex);
+}
+EXPORT_SYMBOL(mlx5_vf_put_core_dev);
+
+static void mlx5_core_verify_params(void)
+{
+ if (prof_sel >= ARRAY_SIZE(profile)) {
+ pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n",
+ prof_sel,
+ ARRAY_SIZE(profile) - 1,
+ MLX5_DEFAULT_PROF);
+ prof_sel = MLX5_DEFAULT_PROF;
+ }
+}
+
+static int __init mlx5_init(void)
+{
+ int err;
+
+ WARN_ONCE(strcmp(MLX5_ADEV_NAME, KBUILD_MODNAME),
+ "mlx5_core name not in sync with kernel module name");
+
+ get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
+
+ mlx5_core_verify_params();
+ mlx5_register_debugfs();
+
+ err = mlx5e_init();
+ if (err)
+ goto err_debug;
+
+ err = mlx5_sf_driver_register();
+ if (err)
+ goto err_sf;
+
+ err = pci_register_driver(&mlx5_core_driver);
+ if (err)
+ goto err_pci;
+
+ return 0;
+
+err_pci:
+ mlx5_sf_driver_unregister();
+err_sf:
+ mlx5e_cleanup();
+err_debug:
+ mlx5_unregister_debugfs();
+ return err;
+}
+
+static void __exit mlx5_cleanup(void)
+{
+ pci_unregister_driver(&mlx5_core_driver);
+ mlx5_sf_driver_unregister();
+ mlx5e_cleanup();
+ mlx5_unregister_debugfs();
+}
+
+module_init(mlx5_init);
+module_exit(mlx5_cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
new file mode 100644
index 000000000..495cca58d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+#include <rdma/ib_verbs.h>
+#include "mlx5_core.h"
+
+int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
+ void *gid;
+
+ MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+ MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+ gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec_in(dev, attach_to_mcg, in);
+}
+EXPORT_SYMBOL(mlx5_core_attach_mcg);
+
+int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
+ void *gid;
+
+ MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+ gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec_in(dev, detach_from_mcg, in);
+}
+EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
new file mode 100644
index 000000000..0b560e97a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_CORE_H__
+#define __MLX5_CORE_H__
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/if_link.h>
+#include <linux/firmware.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
+
+extern uint mlx5_core_debug_mask;
+
+#define mlx5_core_dbg(__dev, format, ...) \
+ dev_dbg((__dev)->device, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_dbg_once(__dev, format, ...) \
+ dev_dbg_once((__dev)->device, \
+ "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_dbg_mask(__dev, mask, format, ...) \
+do { \
+ if ((mask) & mlx5_core_debug_mask) \
+ mlx5_core_dbg(__dev, format, ##__VA_ARGS__); \
+} while (0)
+
+#define mlx5_core_err(__dev, format, ...) \
+ dev_err((__dev)->device, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_err_rl(__dev, format, ...) \
+ dev_err_ratelimited((__dev)->device, \
+ "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_warn(__dev, format, ...) \
+ dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_warn_once(__dev, format, ...) \
+ dev_warn_once((__dev)->device, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_warn_rl(__dev, format, ...) \
+ dev_warn_ratelimited((__dev)->device, \
+ "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_info(__dev, format, ...) \
+ dev_info((__dev)->device, format, ##__VA_ARGS__)
+
+#define mlx5_core_info_rl(__dev, format, ...) \
+ dev_info_ratelimited((__dev)->device, \
+ "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+static inline void mlx5_printk(struct mlx5_core_dev *dev, int level, const char *format, ...)
+{
+ struct device *device = dev->device;
+ struct va_format vaf;
+ va_list args;
+
+ if (WARN_ONCE(level < LOGLEVEL_EMERG || level > LOGLEVEL_DEBUG,
+ "Level %d is out of range, set to default level\n", level))
+ level = LOGLEVEL_DEFAULT;
+
+ va_start(args, format);
+ vaf.fmt = format;
+ vaf.va = &args;
+
+ dev_printk_emit(level, device, "%s %s: %pV", dev_driver_string(device), dev_name(device),
+ &vaf);
+ va_end(args);
+}
+
+#define mlx5_log(__dev, level, format, ...) \
+ mlx5_printk(__dev, level, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+static inline struct device *mlx5_core_dma_dev(struct mlx5_core_dev *dev)
+{
+ return &dev->pdev->dev;
+}
+
+enum {
+ MLX5_CMD_DATA, /* print command payload only */
+ MLX5_CMD_TIME, /* print command execution time */
+};
+
+enum {
+ MLX5_DRIVER_STATUS_ABORTED = 0xfe,
+ MLX5_DRIVER_SYND = 0xbadd00de,
+};
+
+enum mlx5_semaphore_space_address {
+ MLX5_SEMAPHORE_SPACE_DOMAIN = 0xA,
+ MLX5_SEMAPHORE_SW_RESET = 0x20,
+};
+
+#define MLX5_DEFAULT_PROF 2
+
+static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed,
+ size_t item_size, size_t num_items,
+ const char *func, int line)
+{
+ int inlen;
+
+ if (fixed > INT_MAX || item_size > INT_MAX || num_items > INT_MAX) {
+ mlx5_core_err(dev, "%s: %s:%d: input values too big: %zu + %zu * %zu\n",
+ __func__, func, line, fixed, item_size, num_items);
+ return -ENOMEM;
+ }
+
+ if (check_mul_overflow((int)item_size, (int)num_items, &inlen)) {
+ mlx5_core_err(dev, "%s: %s:%d: multiplication overflow: %zu + %zu * %zu\n",
+ __func__, func, line, fixed, item_size, num_items);
+ return -ENOMEM;
+ }
+
+ if (check_add_overflow((int)fixed, inlen, &inlen)) {
+ mlx5_core_err(dev, "%s: %s:%d: addition overflow: %zu + %zu * %zu\n",
+ __func__, func, line, fixed, item_size, num_items);
+ return -ENOMEM;
+ }
+
+ return inlen;
+}
+
+#define MLX5_FLEXIBLE_INLEN(dev, fixed, item_size, num_items) \
+ mlx5_flexible_inlen(dev, fixed, item_size, num_items, __func__, __LINE__)
+
+int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
+int mlx5_query_board_id(struct mlx5_core_dev *dev);
+int mlx5_cmd_init(struct mlx5_core_dev *dev);
+void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
+void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
+ enum mlx5_cmdif_state cmdif_state);
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
+void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
+void mlx5_error_sw_reset(struct mlx5_core_dev *dev);
+u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev);
+int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev);
+void mlx5_disable_device(struct mlx5_core_dev *dev);
+int mlx5_recover_device(struct mlx5_core_dev *dev);
+int mlx5_sriov_init(struct mlx5_core_dev *dev);
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
+int mlx5_sriov_attach(struct mlx5_core_dev *dev);
+void mlx5_sriov_detach(struct mlx5_core_dev *dev);
+int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+void mlx5_sriov_disable(struct pci_dev *pdev);
+int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count);
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *context, u32 *element_id);
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *context, u32 element_id,
+ u32 modify_bitmask);
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ u32 element_id);
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages);
+
+void mlx5_cmd_flush(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
+ u8 access_reg_group);
+int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group,
+ u8 access_reg_group);
+int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
+ u8 feature_group, u8 access_reg_group);
+
+void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
+
+int mlx5_events_init(struct mlx5_core_dev *dev);
+void mlx5_events_cleanup(struct mlx5_core_dev *dev);
+void mlx5_events_start(struct mlx5_core_dev *dev);
+void mlx5_events_stop(struct mlx5_core_dev *dev);
+
+int mlx5_adev_idx_alloc(void);
+void mlx5_adev_idx_free(int idx);
+void mlx5_adev_cleanup(struct mlx5_core_dev *dev);
+int mlx5_adev_init(struct mlx5_core_dev *dev);
+
+int mlx5_attach_device(struct mlx5_core_dev *dev);
+void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend);
+int mlx5_register_device(struct mlx5_core_dev *dev);
+void mlx5_unregister_device(struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev);
+void mlx5_dev_list_lock(void);
+void mlx5_dev_list_unlock(void);
+int mlx5_dev_list_trylock(void);
+
+int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size);
+int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size);
+int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
+int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
+
+struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev);
+void mlx5_dm_cleanup(struct mlx5_core_dev *dev);
+
+#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \
+ MLX5_CAP_GEN((mdev), pps_modify) && \
+ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \
+ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
+
+int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw,
+ struct netlink_ext_ack *extack);
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+ u32 *running_ver, u32 *stored_ver);
+
+#ifdef CONFIG_MLX5_CORE_EN
+int mlx5e_init(void);
+void mlx5e_cleanup(void);
+#else
+static inline int mlx5e_init(void){ return 0; }
+static inline void mlx5e_cleanup(void){}
+#endif
+
+static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
+{
+ return pci_num_vf(dev->pdev) ? true : false;
+}
+
+int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev);
+static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev)
+{
+ int ret;
+
+ mlx5_dev_list_lock();
+ ret = mlx5_rescan_drivers_locked(dev);
+ mlx5_dev_list_unlock();
+ return ret;
+}
+
+void mlx5_lag_update(struct mlx5_core_dev *dev);
+
+enum {
+ MLX5_NIC_IFC_FULL = 0,
+ MLX5_NIC_IFC_DISABLED = 1,
+ MLX5_NIC_IFC_NO_DRAM_NIC = 2,
+ MLX5_NIC_IFC_SW_RESET = 7
+};
+
+u8 mlx5_get_nic_state(struct mlx5_core_dev *dev);
+void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state);
+
+static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev)
+{
+ return dev->coredev_type == MLX5_COREDEV_SF;
+}
+
+int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx);
+void mlx5_mdev_uninit(struct mlx5_core_dev *dev);
+int mlx5_init_one(struct mlx5_core_dev *dev);
+void mlx5_uninit_one(struct mlx5_core_dev *dev);
+void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend);
+void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend);
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
+int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery);
+
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
+
+void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
+static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+
+ return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+}
+
+bool mlx5_eth_supported(struct mlx5_core_dev *dev);
+bool mlx5_rdma_supported(struct mlx5_core_dev *dev);
+bool mlx5_vnet_supported(struct mlx5_core_dev *dev);
+bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev);
+
+#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
new file mode 100644
index 000000000..2e728e4e8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_IRQ_H__
+#define __MLX5_IRQ_H__
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5_COMP_EQS_PER_SF 8
+
+struct mlx5_irq;
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev);
+int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
+int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
+struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
+
+int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
+ int msix_vec_count);
+int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
+
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
+ struct cpumask *affinity);
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs);
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs);
+int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
+struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
+int mlx5_irq_get_index(struct mlx5_irq *irq);
+
+struct mlx5_irq_pool;
+#ifdef CONFIG_MLX5_SF
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs);
+struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask);
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs);
+#else
+static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev,
+ struct mlx5_irq **irqs, int num_irqs) {}
+#endif
+#endif /* __MLX5_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
new file mode 100644
index 000000000..9d735c343
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in,
+ int inlen)
+{
+ u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
+ u32 mkey_index;
+ int err;
+
+ MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
+
+ err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
+ if (err)
+ return err;
+
+ mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
+ *mkey = MLX5_GET(create_mkey_in, in, memory_key_mkey_entry.mkey_7_0) |
+ mlx5_idx_to_mkey(mkey_index);
+
+ mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, *mkey);
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_core_create_mkey);
+
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {};
+
+ MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
+ return mlx5_cmd_exec_in(dev, destroy_mkey, in);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_mkey);
+
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out,
+ int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {};
+
+ memset(out, 0, outlen);
+ MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY);
+ MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_mkey);
+
+static inline u32 mlx5_get_psv(u32 *out, int psv_index)
+{
+ switch (psv_index) {
+ case 1: return MLX5_GET(create_psv_out, out, psv1_index);
+ case 2: return MLX5_GET(create_psv_out, out, psv2_index);
+ case 3: return MLX5_GET(create_psv_out, out, psv3_index);
+ default: return MLX5_GET(create_psv_out, out, psv0_index);
+ }
+}
+
+int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
+ int npsvs, u32 *sig_index)
+{
+ u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {};
+ int i, err;
+
+ if (npsvs > MLX5_MAX_PSVS)
+ return -EINVAL;
+
+ MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV);
+ MLX5_SET(create_psv_in, in, pd, pdn);
+ MLX5_SET(create_psv_in, in, num_psv, npsvs);
+
+ err = mlx5_cmd_exec_inout(dev, create_psv, in, out);
+ if (err)
+ return err;
+
+ for (i = 0; i < npsvs; i++)
+ sig_index[i] = mlx5_get_psv(out, i);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_psv);
+
+int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {};
+
+ MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV);
+ MLX5_SET(destroy_psv_in, in, psvn, psv_num);
+ return mlx5_cmd_exec_in(dev, destroy_psv, in);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_psv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
new file mode 100644
index 000000000..95dc67fb3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -0,0 +1,796 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/mlx5/driver.h>
+#include <linux/xarray.h>
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/tout.h"
+
+enum {
+ MLX5_PAGES_CANT_GIVE = 0,
+ MLX5_PAGES_GIVE = 1,
+ MLX5_PAGES_TAKE = 2
+};
+
+struct mlx5_pages_req {
+ struct mlx5_core_dev *dev;
+ u16 func_id;
+ u8 ec_function;
+ s32 npages;
+ struct work_struct work;
+ u8 release_all;
+};
+
+struct fw_page {
+ struct rb_node rb_node;
+ u64 addr;
+ struct page *page;
+ u32 function;
+ unsigned long bitmask;
+ struct list_head list;
+ unsigned int free_count;
+};
+
+enum {
+ MLX5_MAX_RECLAIM_TIME_MILI = 5000,
+ MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
+};
+
+static u32 get_function(u16 func_id, bool ec_function)
+{
+ return (u32)func_id | (ec_function << 16);
+}
+
+static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_function)
+{
+ if (!func_id)
+ return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF;
+
+ return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF;
+}
+
+static u32 mlx5_get_ec_function(u32 function)
+{
+ return function >> 16;
+}
+
+static u32 mlx5_get_func_id(u32 function)
+{
+ return function & 0xffff;
+}
+
+static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function)
+{
+ struct rb_root *root;
+ int err;
+
+ root = xa_load(&dev->priv.page_root_xa, function);
+ if (root)
+ return root;
+
+ root = kzalloc(sizeof(*root), GFP_KERNEL);
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+
+ err = xa_insert(&dev->priv.page_root_xa, function, root, GFP_KERNEL);
+ if (err) {
+ kfree(root);
+ return ERR_PTR(err);
+ }
+
+ *root = RB_ROOT;
+
+ return root;
+}
+
+static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u32 function)
+{
+ struct rb_node *parent = NULL;
+ struct rb_root *root;
+ struct rb_node **new;
+ struct fw_page *nfp;
+ struct fw_page *tfp;
+ int i;
+
+ root = page_root_per_function(dev, function);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+
+ new = &root->rb_node;
+
+ while (*new) {
+ parent = *new;
+ tfp = rb_entry(parent, struct fw_page, rb_node);
+ if (tfp->addr < addr)
+ new = &parent->rb_left;
+ else if (tfp->addr > addr)
+ new = &parent->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
+ if (!nfp)
+ return -ENOMEM;
+
+ nfp->addr = addr;
+ nfp->page = page;
+ nfp->function = function;
+ nfp->free_count = MLX5_NUM_4K_IN_PAGE;
+ for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
+ set_bit(i, &nfp->bitmask);
+
+ rb_link_node(&nfp->rb_node, parent, new);
+ rb_insert_color(&nfp->rb_node, root);
+ list_add(&nfp->list, &dev->priv.free_list);
+
+ return 0;
+}
+
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr,
+ u32 function)
+{
+ struct fw_page *result = NULL;
+ struct rb_root *root;
+ struct rb_node *tmp;
+ struct fw_page *tfp;
+
+ root = xa_load(&dev->priv.page_root_xa, function);
+ if (WARN_ON_ONCE(!root))
+ return NULL;
+
+ tmp = root->rb_node;
+
+ while (tmp) {
+ tfp = rb_entry(tmp, struct fw_page, rb_node);
+ if (tfp->addr < addr) {
+ tmp = tmp->rb_left;
+ } else if (tfp->addr > addr) {
+ tmp = tmp->rb_right;
+ } else {
+ result = tfp;
+ break;
+ }
+ }
+
+ return result;
+}
+
+static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
+ s32 *npages, int boot)
+{
+ u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {};
+ int err;
+
+ MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
+ MLX5_SET(query_pages_in, in, op_mod, boot ?
+ MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
+ MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
+ MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev));
+
+ err = mlx5_cmd_exec_inout(dev, query_pages, in, out);
+ if (err)
+ return err;
+
+ *npages = MLX5_GET(query_pages_out, out, num_pages);
+ *func_id = MLX5_GET(query_pages_out, out, function_id);
+
+ return err;
+}
+
+static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function)
+{
+ struct fw_page *fp = NULL;
+ struct fw_page *iter;
+ unsigned n;
+
+ list_for_each_entry(iter, &dev->priv.free_list, list) {
+ if (iter->function != function)
+ continue;
+ fp = iter;
+ }
+
+ if (list_empty(&dev->priv.free_list) || !fp)
+ return -ENOMEM;
+
+ n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
+ if (n >= MLX5_NUM_4K_IN_PAGE) {
+ mlx5_core_warn(dev, "alloc 4k bug: fw page = 0x%llx, n = %u, bitmask: %lu, max num of 4K pages: %d\n",
+ fp->addr, n, fp->bitmask, MLX5_NUM_4K_IN_PAGE);
+ return -ENOENT;
+ }
+ clear_bit(n, &fp->bitmask);
+ fp->free_count--;
+ if (!fp->free_count)
+ list_del(&fp->list);
+
+ *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE;
+
+ return 0;
+}
+
+#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
+
+static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
+ bool in_free_list)
+{
+ struct rb_root *root;
+
+ root = xa_load(&dev->priv.page_root_xa, fwp->function);
+ if (WARN_ON_ONCE(!root))
+ return;
+
+ rb_erase(&fwp->rb_node, root);
+ if (in_free_list)
+ list_del(&fwp->list);
+ dma_unmap_page(mlx5_core_dma_dev(dev), fwp->addr & MLX5_U64_4K_PAGE_MASK,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(fwp->page);
+ kfree(fwp);
+}
+
+static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function)
+{
+ struct fw_page *fwp;
+ int n;
+
+ fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, function);
+ if (!fwp) {
+ mlx5_core_warn_rl(dev, "page not found\n");
+ return;
+ }
+ n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
+ fwp->free_count++;
+ set_bit(n, &fwp->bitmask);
+ if (fwp->free_count == MLX5_NUM_4K_IN_PAGE)
+ free_fwp(dev, fwp, fwp->free_count != 1);
+ else if (fwp->free_count == 1)
+ list_add(&fwp->list, &dev->priv.free_list);
+}
+
+static int alloc_system_page(struct mlx5_core_dev *dev, u32 function)
+{
+ struct device *device = mlx5_core_dma_dev(dev);
+ int nid = dev_to_node(device);
+ struct page *page;
+ u64 zero_addr = 1;
+ u64 addr;
+ int err;
+
+ page = alloc_pages_node(nid, GFP_HIGHUSER, 0);
+ if (!page) {
+ mlx5_core_warn(dev, "failed to allocate page\n");
+ return -ENOMEM;
+ }
+map:
+ addr = dma_map_page(device, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(device, addr)) {
+ mlx5_core_warn(dev, "failed dma mapping page\n");
+ err = -ENOMEM;
+ goto err_mapping;
+ }
+
+ /* Firmware doesn't support page with physical address 0 */
+ if (addr == 0) {
+ zero_addr = addr;
+ goto map;
+ }
+
+ err = insert_page(dev, addr, page, function);
+ if (err) {
+ mlx5_core_err(dev, "failed to track allocated page\n");
+ dma_unmap_page(device, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ }
+
+err_mapping:
+ if (err)
+ __free_page(page);
+
+ if (zero_addr == 0)
+ dma_unmap_page(device, zero_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ return err;
+}
+
+static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
+ bool ec_function)
+{
+ u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {};
+ int err;
+
+ MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
+ MLX5_SET(manage_pages_in, in, function_id, func_id);
+ MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
+
+ err = mlx5_cmd_exec_in(dev, manage_pages, in);
+ if (err)
+ mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
+ func_id, err);
+}
+
+static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ int event, bool ec_function)
+{
+ u32 function = get_function(func_id, ec_function);
+ u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+ int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
+ int notify_fail = event;
+ u16 func_type;
+ u64 addr;
+ int err;
+ u32 *in;
+ int i;
+
+ inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
+ goto out_free;
+ }
+
+ for (i = 0; i < npages; i++) {
+retry:
+ err = alloc_4k(dev, &addr, function);
+ if (err) {
+ if (err == -ENOMEM)
+ err = alloc_system_page(dev, function);
+ if (err) {
+ dev->priv.fw_pages_alloc_failed += (npages - i);
+ goto out_4k;
+ }
+
+ goto retry;
+ }
+ MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr);
+ }
+
+ MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
+ MLX5_SET(manage_pages_in, in, function_id, func_id);
+ MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+ MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
+
+ err = mlx5_cmd_do(dev, in, inlen, out, sizeof(out));
+ if (err == -EREMOTEIO) {
+ notify_fail = 0;
+ /* if triggered by FW and failed by FW ignore */
+ if (event) {
+ err = 0;
+ goto out_dropped;
+ }
+ }
+ err = mlx5_cmd_check(dev, err, in, out);
+ if (err) {
+ mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
+ func_id, npages, err);
+ goto out_dropped;
+ }
+
+ func_type = func_id_to_type(dev, func_id, ec_function);
+ dev->priv.page_counters[func_type] += npages;
+ dev->priv.fw_pages += npages;
+
+ mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n",
+ npages, ec_function, func_id, err);
+
+ kvfree(in);
+ return 0;
+
+out_dropped:
+ dev->priv.give_pages_dropped += npages;
+out_4k:
+ for (i--; i >= 0; i--)
+ free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function);
+out_free:
+ kvfree(in);
+ if (notify_fail)
+ page_notify_fail(dev, func_id, ec_function);
+ return err;
+}
+
+static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id,
+ bool ec_function)
+{
+ u32 function = get_function(func_id, ec_function);
+ struct rb_root *root;
+ struct rb_node *p;
+ int npages = 0;
+ u16 func_type;
+
+ root = xa_load(&dev->priv.page_root_xa, function);
+ if (WARN_ON_ONCE(!root))
+ return;
+
+ p = rb_first(root);
+ while (p) {
+ struct fw_page *fwp = rb_entry(p, struct fw_page, rb_node);
+
+ p = rb_next(p);
+ npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count);
+ free_fwp(dev, fwp, fwp->free_count);
+ }
+
+ func_type = func_id_to_type(dev, func_id, ec_function);
+ dev->priv.page_counters[func_type] -= npages;
+ dev->priv.fw_pages -= npages;
+
+ mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x\n",
+ npages, ec_function, func_id);
+}
+
+static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index,
+ u32 npages)
+{
+ u32 pages_set = 0;
+ unsigned int n;
+
+ for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) {
+ MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set,
+ fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE));
+ pages_set++;
+
+ if (!--npages)
+ break;
+ }
+
+ return pages_set;
+}
+
+static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
+ u32 *in, int in_size, u32 *out, int out_size)
+{
+ struct rb_root *root;
+ struct fw_page *fwp;
+ struct rb_node *p;
+ bool ec_function;
+ u32 func_id;
+ u32 npages;
+ u32 i = 0;
+
+ if (!mlx5_cmd_is_down(dev))
+ return mlx5_cmd_do(dev, in, in_size, out, out_size);
+
+ /* No hard feelings, we want our pages back! */
+ npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+ func_id = MLX5_GET(manage_pages_in, in, function_id);
+ ec_function = MLX5_GET(manage_pages_in, in, embedded_cpu_function);
+
+ root = xa_load(&dev->priv.page_root_xa, get_function(func_id, ec_function));
+ if (WARN_ON_ONCE(!root))
+ return -EEXIST;
+
+ p = rb_first(root);
+ while (p && i < npages) {
+ fwp = rb_entry(p, struct fw_page, rb_node);
+ p = rb_next(p);
+
+ i += fwp_fill_manage_pages_out(fwp, out, i, npages - i);
+ }
+
+ MLX5_SET(manage_pages_out, out, output_num_entries, i);
+ return 0;
+}
+
+static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ int *nclaimed, bool event, bool ec_function)
+{
+ u32 function = get_function(func_id, ec_function);
+ int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
+ u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {};
+ int num_claimed;
+ u16 func_type;
+ u32 *out;
+ int err;
+ int i;
+
+ if (nclaimed)
+ *nclaimed = 0;
+
+ outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
+ MLX5_SET(manage_pages_in, in, function_id, func_id);
+ MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+ MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
+
+ mlx5_core_dbg(dev, "func 0x%x, npages %d, outlen %d\n",
+ func_id, npages, outlen);
+ err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
+ if (err) {
+ npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+ dev->priv.reclaim_pages_discard += npages;
+ }
+ /* if triggered by FW event and failed by FW then ignore */
+ if (event && err == -EREMOTEIO) {
+ err = 0;
+ goto out_free;
+ }
+
+ err = mlx5_cmd_check(dev, err, in, out);
+ if (err) {
+ mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
+ goto out_free;
+ }
+
+ num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries);
+ if (num_claimed > npages) {
+ mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n",
+ num_claimed, npages);
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ for (i = 0; i < num_claimed; i++)
+ free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), function);
+
+ if (nclaimed)
+ *nclaimed = num_claimed;
+
+ func_type = func_id_to_type(dev, func_id, ec_function);
+ dev->priv.page_counters[func_type] -= num_claimed;
+ dev->priv.fw_pages -= num_claimed;
+
+out_free:
+ kvfree(out);
+ return err;
+}
+
+static void pages_work_handler(struct work_struct *work)
+{
+ struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
+ struct mlx5_core_dev *dev = req->dev;
+ int err = 0;
+
+ if (req->release_all)
+ release_all_pages(dev, req->func_id, req->ec_function);
+ else if (req->npages < 0)
+ err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL,
+ true, req->ec_function);
+ else if (req->npages > 0)
+ err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function);
+
+ if (err)
+ mlx5_core_warn(dev, "%s fail %d\n",
+ req->npages < 0 ? "reclaim" : "give", err);
+
+ kfree(req);
+}
+
+enum {
+ EC_FUNCTION_MASK = 0x8000,
+ RELEASE_ALL_PAGES_MASK = 0x4000,
+};
+
+static int req_pages_handler(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_pages_req *req;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ struct mlx5_eqe *eqe;
+ bool ec_function;
+ bool release_all;
+ u16 func_id;
+ s32 npages;
+
+ priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+ eqe = data;
+
+ func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+ npages = be32_to_cpu(eqe->data.req_pages.num_pages);
+ ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK;
+ release_all = be16_to_cpu(eqe->data.req_pages.ec_function) &
+ RELEASE_ALL_PAGES_MASK;
+ mlx5_core_dbg(dev, "page request for func 0x%x, npages %d, release_all %d\n",
+ func_id, npages, release_all);
+ req = kzalloc(sizeof(*req), GFP_ATOMIC);
+ if (!req) {
+ mlx5_core_warn(dev, "failed to allocate pages request\n");
+ return NOTIFY_DONE;
+ }
+
+ req->dev = dev;
+ req->func_id = func_id;
+ req->npages = npages;
+ req->ec_function = ec_function;
+ req->release_all = release_all;
+ INIT_WORK(&req->work, pages_work_handler);
+ queue_work(dev->priv.pg_wq, &req->work);
+ return NOTIFY_OK;
+}
+
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
+{
+ u16 func_id;
+ s32 npages;
+ int err;
+
+ err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
+ if (err)
+ return err;
+
+ mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
+ npages, boot ? "boot" : "init", func_id);
+
+ return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev));
+}
+
+enum {
+ MLX5_BLKS_FOR_RECLAIM_PAGES = 12
+};
+
+static int optimal_reclaimed_pages(void)
+{
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_layout *lay;
+ int ret;
+
+ ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
+ MLX5_ST_SZ_BYTES(manage_pages_out)) /
+ MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
+
+ return ret;
+}
+
+static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
+ struct rb_root *root, u32 function)
+{
+ u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES));
+ unsigned long end = jiffies + recl_pages_to_jiffies;
+
+ while (!RB_EMPTY_ROOT(root)) {
+ u32 ec_function = mlx5_get_ec_function(function);
+ u32 function_id = mlx5_get_func_id(function);
+ int nclaimed;
+ int err;
+
+ err = reclaim_pages(dev, function_id, optimal_reclaimed_pages(),
+ &nclaimed, false, ec_function);
+ if (err) {
+ mlx5_core_warn(dev, "reclaim_pages err (%d) func_id=0x%x ec_func=0x%x\n",
+ err, function_id, ec_function);
+ return err;
+ }
+
+ if (nclaimed)
+ end = jiffies + recl_pages_to_jiffies;
+
+ if (time_after(jiffies, end)) {
+ mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+{
+ struct rb_root *root;
+ unsigned long id;
+ void *entry;
+
+ xa_for_each(&dev->priv.page_root_xa, id, entry) {
+ root = entry;
+ mlx5_reclaim_root_pages(dev, root, id);
+ xa_erase(&dev->priv.page_root_xa, id);
+ kfree(root);
+ }
+
+ WARN_ON(!xa_empty(&dev->priv.page_root_xa));
+
+ WARN(dev->priv.fw_pages,
+ "FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.fw_pages);
+ WARN(dev->priv.page_counters[MLX5_VF],
+ "VFs FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.page_counters[MLX5_VF]);
+ WARN(dev->priv.page_counters[MLX5_HOST_PF],
+ "External host PF FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.page_counters[MLX5_HOST_PF]);
+
+ return 0;
+}
+
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+{
+ INIT_LIST_HEAD(&dev->priv.free_list);
+ dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+ if (!dev->priv.pg_wq)
+ return -ENOMEM;
+
+ xa_init(&dev->priv.page_root_xa);
+ mlx5_pages_debugfs_init(dev);
+
+ return 0;
+}
+
+void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
+{
+ mlx5_pages_debugfs_cleanup(dev);
+ xa_destroy(&dev->priv.page_root_xa);
+ destroy_workqueue(dev->priv.pg_wq);
+}
+
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+{
+ MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST);
+ mlx5_eq_notifier_register(dev, &dev->priv.pg_nb);
+}
+
+void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
+{
+ mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb);
+ flush_workqueue(dev->priv.pg_wq);
+}
+
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
+{
+ u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES));
+ unsigned long end = jiffies + recl_vf_pages_to_jiffies;
+ int prev_pages = *pages;
+
+ /* In case of internal error we will free the pages manually later */
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5_core_warn(dev, "Skipping wait for vf pages stage");
+ return 0;
+ }
+
+ mlx5_core_dbg(dev, "Waiting for %d pages\n", prev_pages);
+ while (*pages) {
+ if (time_after(jiffies, end)) {
+ mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages);
+ return -ETIMEDOUT;
+ }
+ if (*pages < prev_pages) {
+ end = jiffies + recl_vf_pages_to_jiffies;
+ prev_pages = *pages;
+ }
+ msleep(50);
+ }
+
+ mlx5_core_dbg(dev, "All pages received\n");
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
new file mode 100644
index 000000000..a6d3fc96e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -0,0 +1,733 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "mlx5_irq.h"
+#include "pci_irq.h"
+#include "lib/sf.h"
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+
+#define MLX5_SFS_PER_CTRL_IRQ 64
+#define MLX5_IRQ_CTRL_SF_MAX 8
+/* min num of vectors for SFs to be enabled */
+#define MLX5_IRQ_VEC_COMP_BASE_SF 2
+
+#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
+#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
+#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
+#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
+
+struct mlx5_irq {
+ struct atomic_notifier_head nh;
+ cpumask_var_t mask;
+ char name[MLX5_MAX_IRQ_FORMATTED_NAME];
+ struct mlx5_irq_pool *pool;
+ int refcount;
+ u32 index;
+ int irqn;
+};
+
+struct mlx5_irq_table {
+ struct mlx5_irq_pool *pf_pool;
+ struct mlx5_irq_pool *sf_ctrl_pool;
+ struct mlx5_irq_pool *sf_comp_pool;
+};
+
+/**
+ * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
+ * to be ssigned to each VF.
+ * @dev: PF to work on
+ * @num_vfs: Number of enabled VFs
+ */
+int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
+{
+ int num_vf_msix, min_msix, max_msix;
+
+ num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+ if (!num_vf_msix)
+ return 0;
+
+ min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
+ max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
+
+ /* Limit maximum number of MSI-X vectors so the default configuration
+ * has some available in the pool. This will allow the user to increase
+ * the number of vectors in a VF without having to first size-down other
+ * VFs.
+ */
+ return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
+}
+
+/**
+ * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
+ * @dev: PF to work on
+ * @function_id: Internal PCI VF function IDd
+ * @msix_vec_count: Number of MSI-X vectors to set
+ */
+int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
+ int msix_vec_count)
+{
+ int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *hca_cap = NULL, *query_cap = NULL, *cap;
+ int num_vf_msix, min_msix, max_msix;
+ int ret;
+
+ num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+ if (!num_vf_msix)
+ return 0;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
+ return -EOPNOTSUPP;
+
+ min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
+ max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
+
+ if (msix_vec_count < min_msix)
+ return -EINVAL;
+
+ if (msix_vec_count > max_msix)
+ return -EOVERFLOW;
+
+ query_cap = kvzalloc(query_sz, GFP_KERNEL);
+ hca_cap = kvzalloc(set_sz, GFP_KERNEL);
+ if (!hca_cap || !query_cap) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
+ if (ret)
+ goto out;
+
+ cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
+ memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
+
+ MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
+ MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
+
+ MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
+ ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
+out:
+ kvfree(hca_cap);
+ kvfree(query_cap);
+ return ret;
+}
+
+/* mlx5_system_free_irq - Free an IRQ
+ * @irq: IRQ to free
+ *
+ * Free the IRQ and other resources such as rmap from the system.
+ * BUT doesn't free or remove reference from mlx5.
+ * This function is very important for the shutdown flow, where we need to
+ * cleanup system resoruces but keep mlx5 objects alive,
+ * see mlx5_irq_table_free_irqs().
+ */
+static void mlx5_system_free_irq(struct mlx5_irq *irq)
+{
+ /* free_irq requires that affinity_hint and rmap will be cleared
+ * before calling it. This is why there is asymmetry with set_rmap
+ * which should be called after alloc_irq but before request_irq.
+ */
+ irq_update_affinity_hint(irq->irqn, NULL);
+ free_irq(irq->irqn, &irq->nh);
+}
+
+static void irq_release(struct mlx5_irq *irq)
+{
+ struct mlx5_irq_pool *pool = irq->pool;
+
+ xa_erase(&pool->irqs, irq->index);
+ mlx5_system_free_irq(irq);
+ free_cpumask_var(irq->mask);
+ kfree(irq);
+}
+
+int mlx5_irq_put(struct mlx5_irq *irq)
+{
+ struct mlx5_irq_pool *pool = irq->pool;
+ int ret = 0;
+
+ mutex_lock(&pool->lock);
+ irq->refcount--;
+ if (!irq->refcount) {
+ irq_release(irq);
+ ret = 1;
+ }
+ mutex_unlock(&pool->lock);
+ return ret;
+}
+
+int mlx5_irq_read_locked(struct mlx5_irq *irq)
+{
+ lockdep_assert_held(&irq->pool->lock);
+ return irq->refcount;
+}
+
+int mlx5_irq_get_locked(struct mlx5_irq *irq)
+{
+ lockdep_assert_held(&irq->pool->lock);
+ if (WARN_ON_ONCE(!irq->refcount))
+ return 0;
+ irq->refcount++;
+ return 1;
+}
+
+static int irq_get(struct mlx5_irq *irq)
+{
+ int err;
+
+ mutex_lock(&irq->pool->lock);
+ err = mlx5_irq_get_locked(irq);
+ mutex_unlock(&irq->pool->lock);
+ return err;
+}
+
+static irqreturn_t irq_int_handler(int irq, void *nh)
+{
+ atomic_notifier_call_chain(nh, 0, NULL);
+ return IRQ_HANDLED;
+}
+
+static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
+{
+ snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
+}
+
+static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
+{
+ if (!pool->xa_num_irqs.max) {
+ /* in case we only have a single irq for the device */
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
+ return;
+ }
+
+ if (vecidx == pool->xa_num_irqs.max) {
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
+ return;
+ }
+
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
+}
+
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity)
+{
+ struct mlx5_core_dev *dev = pool->dev;
+ char name[MLX5_MAX_IRQ_NAME];
+ struct mlx5_irq *irq;
+ int err;
+
+ irq = kzalloc(sizeof(*irq), GFP_KERNEL);
+ if (!irq)
+ return ERR_PTR(-ENOMEM);
+ irq->irqn = pci_irq_vector(dev->pdev, i);
+ if (!mlx5_irq_pool_is_sf_pool(pool))
+ irq_set_name(pool, name, i);
+ else
+ irq_sf_set_name(pool, name, i);
+ ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+ snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
+ MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
+ err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
+ &irq->nh);
+ if (err) {
+ mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
+ goto err_req_irq;
+ }
+ if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+ mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
+ err = -ENOMEM;
+ goto err_cpumask;
+ }
+ if (affinity) {
+ cpumask_copy(irq->mask, affinity);
+ irq_set_affinity_and_hint(irq->irqn, irq->mask);
+ }
+ irq->pool = pool;
+ irq->refcount = 1;
+ irq->index = i;
+ err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
+ irq->index, err);
+ goto err_xa;
+ }
+ return irq;
+err_xa:
+ irq_update_affinity_hint(irq->irqn, NULL);
+ free_cpumask_var(irq->mask);
+err_cpumask:
+ free_irq(irq->irqn, &irq->nh);
+err_req_irq:
+ kfree(irq);
+ return ERR_PTR(err);
+}
+
+int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
+{
+ int ret;
+
+ ret = irq_get(irq);
+ if (!ret)
+ /* Something very bad happens here, we are enabling EQ
+ * on non-existing IRQ.
+ */
+ return -ENOENT;
+ ret = atomic_notifier_chain_register(&irq->nh, nb);
+ if (ret)
+ mlx5_irq_put(irq);
+ return ret;
+}
+
+int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
+{
+ int err = 0;
+
+ err = atomic_notifier_chain_unregister(&irq->nh, nb);
+ mlx5_irq_put(irq);
+ return err;
+}
+
+struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
+{
+ return irq->mask;
+}
+
+int mlx5_irq_get_index(struct mlx5_irq *irq)
+{
+ return irq->index;
+}
+
+/* irq_pool API */
+
+/* requesting an irq from a given pool according to given index */
+static struct mlx5_irq *
+irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
+ struct cpumask *affinity)
+{
+ struct mlx5_irq *irq;
+
+ mutex_lock(&pool->lock);
+ irq = xa_load(&pool->irqs, vecidx);
+ if (irq) {
+ mlx5_irq_get_locked(irq);
+ goto unlock;
+ }
+ irq = mlx5_irq_alloc(pool, vecidx, affinity);
+unlock:
+ mutex_unlock(&pool->lock);
+ return irq;
+}
+
+static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
+{
+ return irq_table->sf_ctrl_pool;
+}
+
+static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
+{
+ return irq_table->sf_comp_pool;
+}
+
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
+
+ if (mlx5_core_is_sf(dev))
+ pool = sf_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
+}
+
+static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
+
+ if (mlx5_core_is_sf(dev))
+ pool = sf_ctrl_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
+}
+
+/**
+ * mlx5_irqs_release - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
+{
+ int i;
+
+ for (i = 0; i < nirqs; i++) {
+ synchronize_irq(irqs[i]->irqn);
+ mlx5_irq_put(irqs[i]);
+ }
+}
+
+/**
+ * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
+ * @ctrl_irq: ctrl IRQ to be released.
+ */
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
+{
+ mlx5_irqs_release(&ctrl_irq, 1);
+}
+
+/**
+ * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
+ * @dev: mlx5 device that requesting the IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ cpumask_copy(req_mask, cpu_online_mask);
+ if (!mlx5_irq_pool_is_sf_pool(pool)) {
+ /* In case we are allocating a control IRQ for PF/VF */
+ if (!pool->xa_num_irqs.max) {
+ cpumask_clear(req_mask);
+ /* In case we only have a single IRQ for PF/VF */
+ cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
+ }
+ /* Allocate the IRQ in the last index of the pool */
+ irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
+ } else {
+ irq = mlx5_irq_affinity_request(pool, req_mask);
+ }
+
+ free_cpumask_var(req_mask);
+ return irq;
+}
+
+/**
+ * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
+ * @dev: mlx5 device that requesting the IRQ.
+ * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
+ * provided.
+ * @affinity: cpumask requested for this IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
+ struct cpumask *affinity)
+{
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool;
+ struct mlx5_irq *irq;
+
+ pool = irq_table->pf_pool;
+ irq = irq_pool_request_vector(pool, vecidx, affinity);
+ if (IS_ERR(irq))
+ return irq;
+ mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
+ irq->irqn, cpumask_pr_args(affinity),
+ irq->refcount / MLX5_EQ_REFS_PER_IRQ);
+ return irq;
+}
+
+/**
+ * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
+{
+ mlx5_irqs_release(irqs, nirqs);
+}
+
+/**
+ * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @cpus: CPUs array for binding the IRQs
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bound to at most 1 CPU.
+ * This function is requests nirqs IRQs, starting from @vecidx.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+ int i;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return -ENOMEM;
+ for (i = 0; i < nirqs; i++) {
+ cpumask_set_cpu(cpus[i], req_mask);
+ irq = mlx5_irq_request(dev, i, req_mask);
+ if (IS_ERR(irq))
+ break;
+ cpumask_clear(req_mask);
+ irqs[i] = irq;
+ }
+
+ free_cpumask_var(req_mask);
+ return i ? i : PTR_ERR(irq);
+}
+
+static struct mlx5_irq_pool *
+irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
+ u32 min_threshold, u32 max_threshold)
+{
+ struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+
+ if (!pool)
+ return ERR_PTR(-ENOMEM);
+ pool->dev = dev;
+ mutex_init(&pool->lock);
+ xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
+ pool->xa_num_irqs.min = start;
+ pool->xa_num_irqs.max = start + size - 1;
+ if (name)
+ snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
+ "%s", name);
+ pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
+ pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
+ mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
+ name, size, start);
+ return pool;
+}
+
+static void irq_pool_free(struct mlx5_irq_pool *pool)
+{
+ struct mlx5_irq *irq;
+ unsigned long index;
+
+ /* There are cases in which we are destrying the irq_table before
+ * freeing all the IRQs, fast teardown for example. Hence, free the irqs
+ * which might not have been freed.
+ */
+ xa_for_each(&pool->irqs, index, irq)
+ irq_release(irq);
+ xa_destroy(&pool->irqs);
+ mutex_destroy(&pool->lock);
+ kfree(pool->irqs_per_cpu);
+ kvfree(pool);
+}
+
+static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+ int num_sf_ctrl_by_msix;
+ int num_sf_ctrl_by_sfs;
+ int num_sf_ctrl;
+ int err;
+
+ /* init pf_pool */
+ table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
+ MLX5_EQ_SHARE_IRQ_MIN_COMP,
+ MLX5_EQ_SHARE_IRQ_MAX_COMP);
+ if (IS_ERR(table->pf_pool))
+ return PTR_ERR(table->pf_pool);
+ if (!mlx5_sf_max_functions(dev))
+ return 0;
+ if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
+ mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
+ return 0;
+ }
+
+ /* init sf_ctrl_pool */
+ num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
+ num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
+ MLX5_SFS_PER_CTRL_IRQ);
+ num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
+ num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
+ table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
+ "mlx5_sf_ctrl",
+ MLX5_EQ_SHARE_IRQ_MIN_CTRL,
+ MLX5_EQ_SHARE_IRQ_MAX_CTRL);
+ if (IS_ERR(table->sf_ctrl_pool)) {
+ err = PTR_ERR(table->sf_ctrl_pool);
+ goto err_pf;
+ }
+ /* init sf_comp_pool */
+ table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
+ sf_vec - num_sf_ctrl, "mlx5_sf_comp",
+ MLX5_EQ_SHARE_IRQ_MIN_COMP,
+ MLX5_EQ_SHARE_IRQ_MAX_COMP);
+ if (IS_ERR(table->sf_comp_pool)) {
+ err = PTR_ERR(table->sf_comp_pool);
+ goto err_sf_ctrl;
+ }
+
+ table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
+ if (!table->sf_comp_pool->irqs_per_cpu) {
+ err = -ENOMEM;
+ goto err_irqs_per_cpu;
+ }
+
+ return 0;
+
+err_irqs_per_cpu:
+ irq_pool_free(table->sf_comp_pool);
+err_sf_ctrl:
+ irq_pool_free(table->sf_ctrl_pool);
+err_pf:
+ irq_pool_free(table->pf_pool);
+ return err;
+}
+
+static void irq_pools_destroy(struct mlx5_irq_table *table)
+{
+ if (table->sf_ctrl_pool) {
+ irq_pool_free(table->sf_comp_pool);
+ irq_pool_free(table->sf_ctrl_pool);
+ }
+ irq_pool_free(table->pf_pool);
+}
+
+static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
+{
+ struct mlx5_irq *irq;
+ unsigned long index;
+
+ xa_for_each(&pool->irqs, index, irq)
+ mlx5_system_free_irq(irq);
+}
+
+static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
+{
+ if (table->sf_ctrl_pool) {
+ mlx5_irq_pool_free_irqs(table->sf_comp_pool);
+ mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
+ }
+ mlx5_irq_pool_free_irqs(table->pf_pool);
+}
+
+/* irq_table API */
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *irq_table;
+
+ if (mlx5_core_is_sf(dev))
+ return 0;
+
+ irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
+ dev->priv.numa_node);
+ if (!irq_table)
+ return -ENOMEM;
+
+ dev->priv.irq_table = irq_table;
+ return 0;
+}
+
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+{
+ if (mlx5_core_is_sf(dev))
+ return;
+
+ kvfree(dev->priv.irq_table);
+}
+
+int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
+{
+ if (!table->pf_pool->xa_num_irqs.max)
+ return 1;
+ return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
+}
+
+int mlx5_irq_table_create(struct mlx5_core_dev *dev)
+{
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int total_vec;
+ int pf_vec;
+ int err;
+
+ if (mlx5_core_is_sf(dev))
+ return 0;
+
+ pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
+ pf_vec = min_t(int, pf_vec, num_eqs);
+
+ total_vec = pf_vec;
+ if (mlx5_sf_max_functions(dev))
+ total_vec += MLX5_IRQ_CTRL_SF_MAX +
+ MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
+
+ total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
+ if (total_vec < 0)
+ return total_vec;
+ pf_vec = min(pf_vec, total_vec);
+
+ err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
+ if (err)
+ pci_free_irq_vectors(dev->pdev);
+
+ return err;
+}
+
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+
+ if (mlx5_core_is_sf(dev))
+ return;
+
+ /* There are cases where IRQs still will be in used when we reaching
+ * to here. Hence, making sure all the irqs are released.
+ */
+ irq_pools_destroy(table);
+ pci_free_irq_vectors(dev->pdev);
+}
+
+void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+
+ if (mlx5_core_is_sf(dev))
+ return;
+
+ mlx5_irq_pools_free_irqs(table);
+ pci_free_irq_vectors(dev->pdev);
+}
+
+int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
+{
+ if (table->sf_comp_pool)
+ return min_t(int, num_online_cpus(),
+ table->sf_comp_pool->xa_num_irqs.max -
+ table->sf_comp_pool->xa_num_irqs.min + 1);
+ else
+ return mlx5_irq_table_get_num_comp(table);
+}
+
+struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_MLX5_SF
+ if (mlx5_core_is_sf(dev))
+ return dev->priv.parent_mdev->priv.irq_table;
+#endif
+ return dev->priv.irq_table;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
new file mode 100644
index 000000000..404717930
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __PCI_IRQ_H__
+#define __PCI_IRQ_H__
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5_MAX_IRQ_NAME (32)
+#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s")
+#define MLX5_MAX_IRQ_FORMATTED_NAME \
+ (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR))
+/* max irq_index is 2047, so four chars */
+#define MLX5_MAX_IRQ_IDX_CHARS (4)
+#define MLX5_EQ_REFS_PER_IRQ (2)
+
+struct mlx5_irq;
+
+struct mlx5_irq_pool {
+ char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
+ struct xa_limit xa_num_irqs;
+ struct mutex lock; /* sync IRQs creations */
+ struct xarray irqs;
+ u32 max_threshold;
+ u32 min_threshold;
+ u16 *irqs_per_cpu;
+ struct mlx5_core_dev *dev;
+};
+
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev);
+static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
+{
+ return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
+}
+
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity);
+int mlx5_irq_get_locked(struct mlx5_irq *irq);
+int mlx5_irq_read_locked(struct mlx5_irq *irq);
+int mlx5_irq_put(struct mlx5_irq *irq);
+
+#endif /* __PCI_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
new file mode 100644
index 000000000..ee5ffdeb9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+ err = mlx5_cmd_exec_inout(dev, alloc_pd, in, out);
+ if (!err)
+ *pdn = MLX5_GET(alloc_pd_out, out, pd);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_alloc_pd);
+
+int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
+
+ MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, in, pd, pdn);
+ return mlx5_cmd_exec_in(dev, dealloc_pd, in);
+}
+EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
new file mode 100644
index 000000000..a1548e6bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -0,0 +1,1056 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/port.h>
+#include "mlx5_core.h"
+
+/* calling with verbose false will not print error to log */
+int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in,
+ void *data_out, int size_out, u16 reg_id, int arg,
+ int write, bool verbose)
+{
+ int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
+ int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
+ int err = -ENOMEM;
+ u32 *out = NULL;
+ u32 *in = NULL;
+ void *data;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!in || !out)
+ goto out;
+
+ data = MLX5_ADDR_OF(access_register_in, in, register_data);
+ memcpy(data, data_in, size_in);
+
+ MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG);
+ MLX5_SET(access_register_in, in, op_mod, !write);
+ MLX5_SET(access_register_in, in, argument, arg);
+ MLX5_SET(access_register_in, in, register_id, reg_id);
+
+ err = mlx5_cmd_do(dev, in, inlen, out, outlen);
+ if (verbose)
+ err = mlx5_cmd_check(dev, err, in, out);
+ if (err)
+ goto out;
+
+ data = MLX5_ADDR_OF(access_register_out, out, register_data);
+ memcpy(data_out, data, size_out);
+
+out:
+ kvfree(out);
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_access_reg);
+
+int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+ int size_in, void *data_out, int size_out,
+ u16 reg_id, int arg, int write)
+{
+ return mlx5_access_reg(dev, data_in, size_in, data_out, size_out,
+ reg_id, arg, write, true);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
+
+int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
+ u8 access_reg_group)
+{
+ u32 in[MLX5_ST_SZ_DW(pcam_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(pcam_reg);
+
+ MLX5_SET(pcam_reg, in, feature_group, feature_group);
+ MLX5_SET(pcam_reg, in, access_reg_group, access_reg_group);
+
+ return mlx5_core_access_reg(dev, in, sz, pcam, sz, MLX5_REG_PCAM, 0, 0);
+}
+
+int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group,
+ u8 access_reg_group)
+{
+ u32 in[MLX5_ST_SZ_DW(mcam_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(mcam_reg);
+
+ MLX5_SET(mcam_reg, in, feature_group, feature_group);
+ MLX5_SET(mcam_reg, in, access_reg_group, access_reg_group);
+
+ return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0);
+}
+
+int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
+ u8 feature_group, u8 access_reg_group)
+{
+ u32 in[MLX5_ST_SZ_DW(qcam_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(qcam_reg);
+
+ MLX5_SET(qcam_reg, in, feature_group, feature_group);
+ MLX5_SET(qcam_reg, in, access_reg_group, access_reg_group);
+
+ return mlx5_core_access_reg(mdev, in, sz, qcam, sz, MLX5_REG_QCAM, 0, 0);
+}
+
+struct mlx5_reg_pcap {
+ u8 rsvd0;
+ u8 port_num;
+ u8 rsvd1[2];
+ __be32 caps_127_96;
+ __be32 caps_95_64;
+ __be32 caps_63_32;
+ __be32 caps_31_0;
+};
+
+int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps)
+{
+ struct mlx5_reg_pcap in;
+ struct mlx5_reg_pcap out;
+
+ memset(&in, 0, sizeof(in));
+ in.caps_127_96 = cpu_to_be32(caps);
+ in.port_num = port_num;
+
+ return mlx5_core_access_reg(dev, &in, sizeof(in), &out,
+ sizeof(out), MLX5_REG_PCAP, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_caps);
+
+int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
+ int ptys_size, int proto_mask, u8 local_port)
+{
+ u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+
+ MLX5_SET(ptys_reg, in, local_port, local_port);
+ MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
+ return mlx5_core_access_reg(dev, in, sizeof(in), ptys,
+ ptys_size, MLX5_REG_PTYS, 0, 0);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_ptys);
+
+int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
+{
+ u32 in[MLX5_ST_SZ_DW(mlcr_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(mlcr_reg)];
+
+ MLX5_SET(mlcr_reg, in, local_port, 1);
+ MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration);
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MLCR, 0, 1);
+}
+
+int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
+ u16 *proto_oper, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB,
+ local_port);
+ if (err)
+ return err;
+
+ *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
+ *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_query_ib_port_oper);
+
+/* This function should be used after setting a port register only */
+void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
+{
+ enum mlx5_port_status ps;
+
+ mlx5_query_port_admin_status(dev, &ps);
+ mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN);
+ if (ps == MLX5_PORT_UP)
+ mlx5_set_port_admin_status(dev, MLX5_PORT_UP);
+}
+EXPORT_SYMBOL_GPL(mlx5_toggle_port_link);
+
+int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
+ enum mlx5_port_status status)
+{
+ u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(paos_reg)];
+
+ MLX5_SET(paos_reg, in, local_port, 1);
+ MLX5_SET(paos_reg, in, admin_status, status);
+ MLX5_SET(paos_reg, in, ase, 1);
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PAOS, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status);
+
+int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
+ enum mlx5_port_status *status)
+{
+ u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(paos_reg)];
+ int err;
+
+ MLX5_SET(paos_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PAOS, 0, 0);
+ if (err)
+ return err;
+ *status = MLX5_GET(paos_reg, out, admin_status);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status);
+
+static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu,
+ u16 *max_mtu, u16 *oper_mtu, u8 port)
+{
+ u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
+
+ MLX5_SET(pmtu_reg, in, local_port, port);
+ mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PMTU, 0, 0);
+
+ if (max_mtu)
+ *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu);
+ if (oper_mtu)
+ *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu);
+ if (admin_mtu)
+ *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu);
+}
+
+int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port)
+{
+ u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
+
+ MLX5_SET(pmtu_reg, in, admin_mtu, mtu);
+ MLX5_SET(pmtu_reg, in, local_port, port);
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PMTU, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_mtu);
+
+void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu,
+ u8 port)
+{
+ mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu);
+
+void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu,
+ u8 port)
+{
+ mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu);
+
+static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num)
+{
+ u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pmlp_reg)];
+ int err;
+
+ MLX5_SET(pmlp_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_PMLP, 0, 0);
+ if (err)
+ return err;
+
+ *module_num = MLX5_GET(lane_2_module_mapping,
+ MLX5_ADDR_OF(pmlp_reg, out, lane0_module_mapping),
+ module);
+
+ return 0;
+}
+
+static int mlx5_query_module_id(struct mlx5_core_dev *dev, int module_num,
+ u8 *module_id)
+{
+ u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {};
+ u32 out[MLX5_ST_SZ_DW(mcia_reg)];
+ int err, status;
+ u8 *ptr;
+
+ MLX5_SET(mcia_reg, in, i2c_device_address, MLX5_I2C_ADDR_LOW);
+ MLX5_SET(mcia_reg, in, module, module_num);
+ MLX5_SET(mcia_reg, in, device_address, 0);
+ MLX5_SET(mcia_reg, in, page_number, 0);
+ MLX5_SET(mcia_reg, in, size, 1);
+ MLX5_SET(mcia_reg, in, l, 0);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCIA, 0, 0);
+ if (err)
+ return err;
+
+ status = MLX5_GET(mcia_reg, out, status);
+ if (status) {
+ mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n",
+ status);
+ return -EIO;
+ }
+ ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
+
+ *module_id = ptr[0];
+
+ return 0;
+}
+
+static int mlx5_qsfp_eeprom_page(u16 offset)
+{
+ if (offset < MLX5_EEPROM_PAGE_LENGTH)
+ /* Addresses between 0-255 - page 00 */
+ return 0;
+
+ /* Addresses between 256 - 639 belongs to pages 01, 02 and 03
+ * For example, offset = 400 belongs to page 02:
+ * 1 + ((400 - 256)/128) = 2
+ */
+ return 1 + ((offset - MLX5_EEPROM_PAGE_LENGTH) /
+ MLX5_EEPROM_HIGH_PAGE_LENGTH);
+}
+
+static int mlx5_qsfp_eeprom_high_page_offset(int page_num)
+{
+ if (!page_num) /* Page 0 always start from low page */
+ return 0;
+
+ /* High page */
+ return page_num * MLX5_EEPROM_HIGH_PAGE_LENGTH;
+}
+
+static void mlx5_qsfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset)
+{
+ *i2c_addr = MLX5_I2C_ADDR_LOW;
+ *page_num = mlx5_qsfp_eeprom_page(*offset);
+ *offset -= mlx5_qsfp_eeprom_high_page_offset(*page_num);
+}
+
+static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset)
+{
+ *i2c_addr = MLX5_I2C_ADDR_LOW;
+ *page_num = 0;
+
+ if (*offset < MLX5_EEPROM_PAGE_LENGTH)
+ return;
+
+ *i2c_addr = MLX5_I2C_ADDR_HIGH;
+ *offset -= MLX5_EEPROM_PAGE_LENGTH;
+}
+
+static int mlx5_mcia_max_bytes(struct mlx5_core_dev *dev)
+{
+ /* mcia supports either 12 dwords or 32 dwords */
+ return (MLX5_CAP_MCAM_FEATURE(dev, mcia_32dwords) ? 32 : 12) * sizeof(u32);
+}
+
+static int mlx5_query_mcia(struct mlx5_core_dev *dev,
+ struct mlx5_module_eeprom_query_params *params, u8 *data)
+{
+ u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {};
+ u32 out[MLX5_ST_SZ_DW(mcia_reg)];
+ int status, err;
+ void *ptr;
+ u16 size;
+
+ size = min_t(int, params->size, mlx5_mcia_max_bytes(dev));
+
+ MLX5_SET(mcia_reg, in, l, 0);
+ MLX5_SET(mcia_reg, in, size, size);
+ MLX5_SET(mcia_reg, in, module, params->module_number);
+ MLX5_SET(mcia_reg, in, device_address, params->offset);
+ MLX5_SET(mcia_reg, in, page_number, params->page);
+ MLX5_SET(mcia_reg, in, i2c_device_address, params->i2c_address);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCIA, 0, 0);
+ if (err)
+ return err;
+
+ status = MLX5_GET(mcia_reg, out, status);
+ if (status) {
+ mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n",
+ status);
+ return -EIO;
+ }
+
+ ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
+ memcpy(data, ptr, size);
+
+ return size;
+}
+
+int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
+ u16 offset, u16 size, u8 *data)
+{
+ struct mlx5_module_eeprom_query_params query = {0};
+ u8 module_id;
+ int err;
+
+ err = mlx5_query_module_num(dev, &query.module_number);
+ if (err)
+ return err;
+
+ err = mlx5_query_module_id(dev, query.module_number, &module_id);
+ if (err)
+ return err;
+
+ switch (module_id) {
+ case MLX5_MODULE_ID_SFP:
+ mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ case MLX5_MODULE_ID_QSFP:
+ case MLX5_MODULE_ID_QSFP_PLUS:
+ case MLX5_MODULE_ID_QSFP28:
+ mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ default:
+ mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
+ return -EINVAL;
+ }
+
+ if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
+ /* Cross pages read, read until offset 256 in low page */
+ size = MLX5_EEPROM_PAGE_LENGTH - offset;
+
+ query.size = size;
+ query.offset = offset;
+
+ return mlx5_query_mcia(dev, &query, data);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom);
+
+int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev,
+ struct mlx5_module_eeprom_query_params *params,
+ u8 *data)
+{
+ int err;
+
+ err = mlx5_query_module_num(dev, &params->module_number);
+ if (err)
+ return err;
+
+ if (params->i2c_address != MLX5_I2C_ADDR_HIGH &&
+ params->i2c_address != MLX5_I2C_ADDR_LOW) {
+ mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address);
+ return -EINVAL;
+ }
+
+ return mlx5_query_mcia(dev, params, data);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom_by_page);
+
+static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc,
+ int pvlc_size, u8 local_port)
+{
+ u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0};
+
+ MLX5_SET(pvlc_reg, in, local_port, local_port);
+ return mlx5_core_access_reg(dev, in, sizeof(in), pvlc,
+ pvlc_size, MLX5_REG_PVLC, 0, 0);
+}
+
+int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
+ u8 *vl_hw_cap, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(pvlc_reg)];
+ int err;
+
+ err = mlx5_query_port_pvlc(dev, out, sizeof(out), local_port);
+ if (err)
+ return err;
+
+ *vl_hw_cap = MLX5_GET(pvlc_reg, out, vl_hw_cap);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap);
+
+static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out,
+ u32 out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ out_size, MLX5_REG_PFCC, 0, 0);
+}
+
+int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+ MLX5_SET(pfcc_reg, in, pptx, tx_pause);
+ MLX5_SET(pfcc_reg, in, pprx, rx_pause);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_pause);
+
+int mlx5_query_port_pause(struct mlx5_core_dev *dev,
+ u32 *rx_pause, u32 *tx_pause)
+{
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+ int err;
+
+ err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ if (rx_pause)
+ *rx_pause = MLX5_GET(pfcc_reg, out, pprx);
+
+ if (tx_pause)
+ *tx_pause = MLX5_GET(pfcc_reg, out, pptx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_pause);
+
+int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev,
+ u16 stall_critical_watermark,
+ u16 stall_minor_watermark)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+ MLX5_SET(pfcc_reg, in, pptx_mask_n, 1);
+ MLX5_SET(pfcc_reg, in, pprx_mask_n, 1);
+ MLX5_SET(pfcc_reg, in, ppan_mask_n, 1);
+ MLX5_SET(pfcc_reg, in, critical_stall_mask, 1);
+ MLX5_SET(pfcc_reg, in, minor_stall_mask, 1);
+ MLX5_SET(pfcc_reg, in, device_stall_critical_watermark,
+ stall_critical_watermark);
+ MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+
+int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev,
+ u16 *stall_critical_watermark,
+ u16 *stall_minor_watermark)
+{
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+ int err;
+
+ err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ if (stall_critical_watermark)
+ *stall_critical_watermark = MLX5_GET(pfcc_reg, out,
+ device_stall_critical_watermark);
+
+ if (stall_minor_watermark)
+ *stall_minor_watermark = MLX5_GET(pfcc_reg, out,
+ device_stall_minor_watermark);
+
+ return 0;
+}
+
+int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+ MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx);
+ MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx);
+ MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_tx);
+ MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_rx);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_pfc);
+
+int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
+{
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+ int err;
+
+ err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ if (pfc_en_tx)
+ *pfc_en_tx = MLX5_GET(pfcc_reg, out, pfctx);
+
+ if (pfc_en_rx)
+ *pfc_en_rx = MLX5_GET(pfcc_reg, out, pfcrx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
+
+int mlx5_max_tc(struct mlx5_core_dev *mdev)
+{
+ u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
+
+ return num_tc - 1;
+}
+
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0};
+
+ MLX5_SET(dcbx_param, in, port_number, 1);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0);
+}
+
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in)
+{
+ u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+ MLX5_SET(dcbx_param, in, port_number, 1);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(out), out,
+ sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1);
+}
+
+int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
+{
+ u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+ int err;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ if (prio_tc[i] > mlx5_max_tc(mdev))
+ return -EINVAL;
+
+ MLX5_SET(qtct_reg, in, prio, i);
+ MLX5_SET(qtct_reg, in, tclass, prio_tc[i]);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QTCT, 0, 1);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc);
+
+int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
+ u8 prio, u8 *tc)
+{
+ u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+ u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+ int err;
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+
+ MLX5_SET(qtct_reg, in, port_number, 1);
+ MLX5_SET(qtct_reg, in, prio, prio);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QTCT, 0, 0);
+ if (!err)
+ *tc = MLX5_GET(qtct_reg, out, tclass);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc);
+
+static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+
+ if (!MLX5_CAP_GEN(mdev, ets))
+ return -EOPNOTSUPP;
+
+ return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out),
+ MLX5_REG_QETCR, 0, 1);
+}
+
+static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
+ int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+
+ if (!MLX5_CAP_GEN(mdev, ets))
+ return -EOPNOTSUPP;
+
+ memset(in, 0, sizeof(in));
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen,
+ MLX5_REG_QETCR, 0, 0);
+}
+
+int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
+ int i;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1);
+ MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]);
+ }
+
+ return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group);
+
+int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev,
+ u8 tc, u8 *tc_group)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+ void *ets_tcn_conf;
+ int err;
+
+ err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+ if (err)
+ return err;
+
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+ tc_configuration[tc]);
+
+ *tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ group);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group);
+
+int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
+ int i;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1);
+ MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]);
+ }
+
+ return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc);
+
+int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
+ u8 tc, u8 *bw_pct)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+ void *ets_tcn_conf;
+ int err;
+
+ err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+ if (err)
+ return err;
+
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+ tc_configuration[tc]);
+
+ *bw_pct = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ bw_allocation);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc);
+
+int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+ u8 *max_bw_value,
+ u8 *max_bw_units)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
+ void *ets_tcn_conf;
+ int i;
+
+ MLX5_SET(qetc_reg, in, port_number, 1);
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]);
+
+ MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1);
+ MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units,
+ max_bw_units[i]);
+ MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value,
+ max_bw_value[i]);
+ }
+
+ return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit);
+
+int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+ u8 *max_bw_value,
+ u8 *max_bw_units)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+ void *ets_tcn_conf;
+ int err;
+ int i;
+
+ err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+ if (err)
+ return err;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]);
+
+ max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ max_bw_value);
+ max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ max_bw_units);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit);
+
+int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode)
+{
+ u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {};
+
+ MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL);
+ MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1);
+ MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode);
+ return mlx5_cmd_exec_in(mdev, set_wol_rol, in);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_wol);
+
+int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
+{
+ u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {};
+ int err;
+
+ MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL);
+ err = mlx5_cmd_exec_inout(mdev, query_wol_rol, in, out);
+ if (!err)
+ *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
+
+int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ outlen, MLX5_REG_PCMR, 0, 0);
+}
+
+int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+
+ return mlx5_core_access_reg(mdev, in, inlen, out,
+ sizeof(out), MLX5_REG_PCMR, 0, 1);
+}
+
+int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+ int err;
+
+ err = mlx5_query_ports_check(mdev, in, sizeof(in));
+ if (err)
+ return err;
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ MLX5_SET(pcmr_reg, in, fcs_chk, enable);
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
+ bool *enabled)
+{
+ u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+ /* Default values for FW which do not support MLX5_REG_PCMR */
+ *supported = false;
+ *enabled = true;
+
+ if (!MLX5_CAP_GEN(mdev, ports_check))
+ return;
+
+ if (mlx5_query_ports_check(mdev, out, sizeof(out)))
+ return;
+
+ *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap));
+ *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
+}
+
+int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
+{
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps,
+ mtpps_size, MLX5_REG_MTPPS, 0, 0);
+}
+
+int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
+{
+ u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ return mlx5_core_access_reg(mdev, mtpps, mtpps_size, out,
+ sizeof(out), MLX5_REG_MTPPS, 0, 1);
+}
+
+int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode)
+{
+ u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+ int err = 0;
+
+ MLX5_SET(mtppse_reg, in, pin, pin);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MTPPSE, 0, 0);
+ if (err)
+ return err;
+
+ *arm = MLX5_GET(mtppse_reg, in, event_arm);
+ *mode = MLX5_GET(mtppse_reg, in, event_generation_mode);
+
+ return err;
+}
+
+int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode)
+{
+ u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+
+ MLX5_SET(mtppse_reg, in, pin, pin);
+ MLX5_SET(mtppse_reg, in, event_arm, arm);
+ MLX5_SET(mtppse_reg, in, event_generation_mode, mode);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MTPPSE, 0, 1);
+}
+
+int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state)
+{
+ u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ int err;
+
+ MLX5_SET(qpts_reg, in, local_port, 1);
+ MLX5_SET(qpts_reg, in, trust_state, trust_state);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QPTS, 0, 1);
+ return err;
+}
+
+int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state)
+{
+ u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ int err;
+
+ MLX5_SET(qpts_reg, in, local_port, 1);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QPTS, 0, 0);
+ if (!err)
+ *trust_state = MLX5_GET(qpts_reg, out, trust_state);
+
+ return err;
+}
+
+int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio)
+{
+ int sz = MLX5_ST_SZ_BYTES(qpdpm_reg);
+ void *qpdpm_dscp;
+ void *out;
+ void *in;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpdpm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0);
+ if (err)
+ goto out;
+
+ memcpy(in, out, sz);
+ MLX5_SET(qpdpm_reg, in, local_port, 1);
+
+ /* Update the corresponding dscp entry */
+ qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, in, dscp[dscp]);
+ MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, prio, prio);
+ MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, e, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 1);
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+
+/* dscp2prio[i]: priority that dscp i mapped to */
+#define MLX5E_SUPPORTED_DSCP 64
+int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio)
+{
+ int sz = MLX5_ST_SZ_BYTES(qpdpm_reg);
+ void *qpdpm_dscp;
+ void *out;
+ void *in;
+ int err;
+ int i;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpdpm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0);
+ if (err)
+ goto out;
+
+ for (i = 0; i < (MLX5E_SUPPORTED_DSCP); i++) {
+ qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, out, dscp[i]);
+ dscp2prio[i] = MLX5_GET16(qpdpm_dscp_reg, qpdpm_dscp, prio);
+ }
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
new file mode 100644
index 000000000..8bce730b5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "qos.h"
+
+#define MLX5_QOS_DEFAULT_DWRR_UID 0
+
+bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev)
+{
+ if (!MLX5_CAP_GEN(mdev, qos))
+ return false;
+ if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling))
+ return false;
+ if (!MLX5_CAP_QOS(mdev, nic_bw_share))
+ return false;
+ if (!MLX5_CAP_QOS(mdev, nic_rate_limit))
+ return false;
+ return true;
+}
+
+int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+ return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group);
+}
+
+int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 bw_share, u32 max_avg_bw, u32 *id)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+ return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+ sched_ctx, id);
+}
+
+int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 bw_share, u32 max_avg_bw, u32 *id)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ void *attr;
+
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+ attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+ MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
+
+ return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+ sched_ctx, id);
+}
+
+int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id)
+{
+ return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id);
+}
+
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev,
+ u32 bw_share, u32 max_avg_bw, u32 id)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ u32 bitmask = 0;
+
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+
+ return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+ sched_ctx, id, bitmask);
+}
+
+int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id)
+{
+ return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h
new file mode 100644
index 000000000..624ce822b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_QOS_H
+#define __MLX5_QOS_H
+
+#include "mlx5_core.h"
+
+#define MLX5_DEBUG_QOS_MASK BIT(4)
+
+#define qos_err(mdev, fmt, ...) \
+ mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__)
+#define qos_warn(mdev, fmt, ...) \
+ mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__)
+#define qos_dbg(mdev, fmt, ...) \
+ mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__)
+
+bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev);
+int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+
+int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 bw_share, u32 max_avg_bw, u32 *id);
+int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+ u32 bw_share, u32 max_avg_bw, u32 *id);
+int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id);
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 bw_share,
+ u32 max_avg_bw, u32 id);
+int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
new file mode 100644
index 000000000..540cf05f6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include <rdma/ib_verbs.h>
+#include <net/addrconf.h>
+
+#include "lib/mlx5.h"
+#include "eswitch.h"
+#include "fs_core.h"
+#include "rdma.h"
+
+static void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_roce *roce = &dev->priv.roce;
+
+ mlx5_del_flow_rules(roce->allow_rule);
+ mlx5_destroy_flow_group(roce->fg);
+ mlx5_destroy_flow_table(roce->ft);
+}
+
+static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_core_roce *roce = &dev->priv.roce;
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ void *match_criteria;
+ u32 *flow_group_in;
+ void *misc;
+ int err;
+
+ if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
+ MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)))
+ return -EOPNOTSUPP;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ kvfree(flow_group_in);
+ return -ENOMEM;
+ }
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL);
+ if (!ns) {
+ mlx5_core_err(dev, "Failed to get RDMA RX namespace");
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ ft_attr.max_fte = 1;
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ mlx5_core_err(dev, "Failed to create RDMA RX flow table");
+ err = PTR_ERR(ft);
+ goto free;
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+
+ fg = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err);
+ goto destroy_flow_table;
+ }
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port,
+ dev->priv.eswitch->manager_vport);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n",
+ err);
+ goto destroy_flow_group;
+ }
+
+ kvfree(spec);
+ kvfree(flow_group_in);
+ roce->ft = ft;
+ roce->fg = fg;
+ roce->allow_rule = flow_rule;
+
+ return 0;
+
+destroy_flow_group:
+ mlx5_destroy_flow_group(fg);
+destroy_flow_table:
+ mlx5_destroy_flow_table(ft);
+free:
+ kvfree(spec);
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev)
+{
+ mlx5_core_roce_gid_set(dev, 0, 0, 0,
+ NULL, NULL, false, 0, 1);
+}
+
+static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid)
+{
+ u8 hw_id[ETH_ALEN];
+
+ mlx5_query_mac_address(dev, hw_id);
+ gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ addrconf_addr_eui48(&gid->raw[8], hw_id);
+}
+
+static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev)
+{
+ union ib_gid gid;
+ u8 mac[ETH_ALEN];
+
+ mlx5_rdma_make_default_gid(dev, &gid);
+ return mlx5_core_roce_gid_set(dev, 0,
+ MLX5_ROCE_VERSION_1,
+ 0, gid.raw, mac,
+ false, 0, 1);
+}
+
+void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_roce *roce = &dev->priv.roce;
+
+ if (!roce->ft)
+ return;
+
+ mlx5_rdma_disable_roce_steering(dev);
+ mlx5_rdma_del_roce_addr(dev);
+ mlx5_nic_vport_disable_roce(dev);
+}
+
+void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, roce))
+ return;
+
+ err = mlx5_nic_vport_enable_roce(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
+ return;
+ }
+
+ err = mlx5_rdma_add_roce_addr(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err);
+ goto disable_roce;
+ }
+
+ err = mlx5_rdma_enable_roce_steering(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err);
+ goto del_roce_addr;
+ }
+
+ return;
+
+del_roce_addr:
+ mlx5_rdma_del_roce_addr(dev);
+disable_roce:
+ mlx5_nic_vport_disable_roce(dev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
new file mode 100644
index 000000000..750cff2a7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_RDMA_H__
+#define __MLX5_RDMA_H__
+
+#include "mlx5_core.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev);
+void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {}
+static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_RDMA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
new file mode 100644
index 000000000..9f8b4005f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+/* Scheduling element fw management */
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *ctx, u32 *element_id)
+{
+ u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {};
+ void *schedc;
+ int err;
+
+ schedc = MLX5_ADDR_OF(create_scheduling_element_in, in,
+ scheduling_context);
+ MLX5_SET(create_scheduling_element_in, in, opcode,
+ MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT);
+ MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy,
+ hierarchy);
+ memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+ err = mlx5_cmd_exec_inout(dev, create_scheduling_element, in, out);
+ if (err)
+ return err;
+
+ *element_id = MLX5_GET(create_scheduling_element_out, out,
+ scheduling_element_id);
+ return 0;
+}
+
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *ctx, u32 element_id,
+ u32 modify_bitmask)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {};
+ void *schedc;
+
+ schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in,
+ scheduling_context);
+ MLX5_SET(modify_scheduling_element_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT);
+ MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id,
+ element_id);
+ MLX5_SET(modify_scheduling_element_in, in, modify_bitmask,
+ modify_bitmask);
+ MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy,
+ hierarchy);
+ memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+ return mlx5_cmd_exec_in(dev, modify_scheduling_element, in);
+}
+
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ u32 element_id)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {};
+
+ MLX5_SET(destroy_scheduling_element_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+ MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id,
+ element_id);
+ MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy,
+ hierarchy);
+
+ return mlx5_cmd_exec_in(dev, destroy_scheduling_element, in);
+}
+
+static bool mlx5_rl_are_equal_raw(struct mlx5_rl_entry *entry, void *rl_in,
+ u16 uid)
+{
+ return (!memcmp(entry->rl_raw, rl_in, sizeof(entry->rl_raw)) &&
+ entry->uid == uid);
+}
+
+/* Finds an entry where we can register the given rate
+ * If the rate already exists, return the entry where it is registered,
+ * otherwise return the first available entry.
+ * If the table is full, return NULL
+ */
+static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
+ void *rl_in, u16 uid, bool dedicated)
+{
+ struct mlx5_rl_entry *ret_entry = NULL;
+ bool empty_found = false;
+ int i;
+
+ lockdep_assert_held(&table->rl_lock);
+ WARN_ON(!table->rl_entry);
+
+ for (i = 0; i < table->max_size; i++) {
+ if (dedicated) {
+ if (!table->rl_entry[i].refcount)
+ return &table->rl_entry[i];
+ continue;
+ }
+
+ if (table->rl_entry[i].refcount) {
+ if (table->rl_entry[i].dedicated)
+ continue;
+ if (mlx5_rl_are_equal_raw(&table->rl_entry[i], rl_in,
+ uid))
+ return &table->rl_entry[i];
+ } else if (!empty_found) {
+ empty_found = true;
+ ret_entry = &table->rl_entry[i];
+ }
+ }
+
+ return ret_entry;
+}
+
+static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_rl_entry *entry, bool set)
+{
+ u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {};
+ void *pp_context;
+
+ pp_context = MLX5_ADDR_OF(set_pp_rate_limit_in, in, ctx);
+ MLX5_SET(set_pp_rate_limit_in, in, opcode,
+ MLX5_CMD_OP_SET_PP_RATE_LIMIT);
+ MLX5_SET(set_pp_rate_limit_in, in, uid, entry->uid);
+ MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, entry->index);
+ if (set)
+ memcpy(pp_context, entry->rl_raw, sizeof(entry->rl_raw));
+ return mlx5_cmd_exec_in(dev, set_pp_rate_limit, in);
+}
+
+bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+
+ return (rate <= table->max_rate && rate >= table->min_rate);
+}
+EXPORT_SYMBOL(mlx5_rl_is_in_range);
+
+bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
+ struct mlx5_rate_limit *rl_1)
+{
+ return ((rl_0->rate == rl_1->rate) &&
+ (rl_0->max_burst_sz == rl_1->max_burst_sz) &&
+ (rl_0->typical_pkt_sz == rl_1->typical_pkt_sz));
+}
+EXPORT_SYMBOL(mlx5_rl_are_equal);
+
+static int mlx5_rl_table_get(struct mlx5_rl_table *table)
+{
+ int i;
+
+ lockdep_assert_held(&table->rl_lock);
+
+ if (table->rl_entry) {
+ table->refcount++;
+ return 0;
+ }
+
+ table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
+ GFP_KERNEL);
+ if (!table->rl_entry)
+ return -ENOMEM;
+
+ /* The index represents the index in HW rate limit table
+ * Index 0 is reserved for unlimited rate
+ */
+ for (i = 0; i < table->max_size; i++)
+ table->rl_entry[i].index = i + 1;
+
+ table->refcount++;
+ return 0;
+}
+
+static void mlx5_rl_table_put(struct mlx5_rl_table *table)
+{
+ lockdep_assert_held(&table->rl_lock);
+ if (--table->refcount)
+ return;
+
+ kfree(table->rl_entry);
+ table->rl_entry = NULL;
+}
+
+static void mlx5_rl_table_free(struct mlx5_core_dev *dev, struct mlx5_rl_table *table)
+{
+ int i;
+
+ if (!table->rl_entry)
+ return;
+
+ /* Clear all configured rates */
+ for (i = 0; i < table->max_size; i++)
+ if (table->rl_entry[i].refcount)
+ mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i], false);
+ kfree(table->rl_entry);
+}
+
+static void mlx5_rl_entry_get(struct mlx5_rl_entry *entry)
+{
+ entry->refcount++;
+}
+
+static void
+mlx5_rl_entry_put(struct mlx5_core_dev *dev, struct mlx5_rl_entry *entry)
+{
+ entry->refcount--;
+ if (!entry->refcount)
+ mlx5_set_pp_rate_limit_cmd(dev, entry, false);
+}
+
+int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
+ bool dedicated_entry, u16 *index)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rl_entry *entry;
+ u32 rate;
+ int err;
+
+ if (!table->max_size)
+ return -EOPNOTSUPP;
+
+ rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit);
+ if (!rate || !mlx5_rl_is_in_range(dev, rate)) {
+ mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",
+ rate, table->min_rate, table->max_rate);
+ return -EINVAL;
+ }
+
+ mutex_lock(&table->rl_lock);
+ err = mlx5_rl_table_get(table);
+ if (err)
+ goto out;
+
+ entry = find_rl_entry(table, rl_in, uid, dedicated_entry);
+ if (!entry) {
+ mlx5_core_err(dev, "Max number of %u rates reached\n",
+ table->max_size);
+ err = -ENOSPC;
+ goto rl_err;
+ }
+ if (!entry->refcount) {
+ /* new rate limit */
+ memcpy(entry->rl_raw, rl_in, sizeof(entry->rl_raw));
+ entry->uid = uid;
+ err = mlx5_set_pp_rate_limit_cmd(dev, entry, true);
+ if (err) {
+ mlx5_core_err(
+ dev,
+ "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+ err, rate,
+ MLX5_GET(set_pp_rate_limit_context, rl_in,
+ burst_upper_bound),
+ MLX5_GET(set_pp_rate_limit_context, rl_in,
+ typical_packet_size));
+ goto rl_err;
+ }
+
+ entry->dedicated = dedicated_entry;
+ }
+ mlx5_rl_entry_get(entry);
+ *index = entry->index;
+ mutex_unlock(&table->rl_lock);
+ return 0;
+
+rl_err:
+ mlx5_rl_table_put(table);
+out:
+ mutex_unlock(&table->rl_lock);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_rl_add_rate_raw);
+
+void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rl_entry *entry;
+
+ mutex_lock(&table->rl_lock);
+ entry = &table->rl_entry[index - 1];
+ mlx5_rl_entry_put(dev, entry);
+ mlx5_rl_table_put(table);
+ mutex_unlock(&table->rl_lock);
+}
+EXPORT_SYMBOL(mlx5_rl_remove_rate_raw);
+
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
+ struct mlx5_rate_limit *rl)
+{
+ u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {};
+
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate);
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound,
+ rl->max_burst_sz);
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size,
+ rl->typical_pkt_sz);
+
+ return mlx5_rl_add_rate_raw(dev, rl_raw,
+ MLX5_CAP_QOS(dev, packet_pacing_uid) ?
+ MLX5_SHARED_RESOURCE_UID : 0,
+ false, index);
+}
+EXPORT_SYMBOL(mlx5_rl_add_rate);
+
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl)
+{
+ u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {};
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rl_entry *entry = NULL;
+
+ /* 0 is a reserved value for unlimited rate */
+ if (rl->rate == 0)
+ return;
+
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate);
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound,
+ rl->max_burst_sz);
+ MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size,
+ rl->typical_pkt_sz);
+
+ mutex_lock(&table->rl_lock);
+ entry = find_rl_entry(table, rl_raw,
+ MLX5_CAP_QOS(dev, packet_pacing_uid) ?
+ MLX5_SHARED_RESOURCE_UID : 0, false);
+ if (!entry || !entry->refcount) {
+ mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n",
+ rl->rate, rl->max_burst_sz, rl->typical_pkt_sz);
+ goto out;
+ }
+ mlx5_rl_entry_put(dev, entry);
+ mlx5_rl_table_put(table);
+out:
+ mutex_unlock(&table->rl_lock);
+}
+EXPORT_SYMBOL(mlx5_rl_remove_rate);
+
+int mlx5_init_rl_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {
+ table->max_size = 0;
+ return 0;
+ }
+
+ mutex_init(&table->rl_lock);
+
+ /* First entry is reserved for unlimited rate */
+ table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1;
+ table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);
+ table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate);
+
+ mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n",
+ table->max_size,
+ table->min_rate >> 10,
+ table->max_rate >> 10);
+
+ return 0;
+}
+
+void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing))
+ return;
+
+ mlx5_rl_table_free(dev, table);
+ mutex_destroy(&table->rl_lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c
new file mode 100644
index 000000000..a8d75c2f0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include "priv.h"
+
+int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_sf_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_sf_in)] = {};
+
+ MLX5_SET(alloc_sf_in, in, opcode, MLX5_CMD_OP_ALLOC_SF);
+ MLX5_SET(alloc_sf_in, in, function_id, function_id);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_sf_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_sf_in)] = {};
+
+ MLX5_SET(dealloc_sf_in, in, opcode, MLX5_CMD_OP_DEALLOC_SF);
+ MLX5_SET(dealloc_sf_in, in, function_id, function_id);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
+
+ MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+ MLX5_SET(enable_hca_in, in, function_id, func_id);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+ return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+}
+
+int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
+
+ MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+ MLX5_SET(disable_hca_in, in, function_id, func_id);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
new file mode 100644
index 000000000..8e2abbab0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+#include "mlx5_core.h"
+#include "dev.h"
+#include "sf/vhca_event.h"
+#include "sf/sf.h"
+#include "sf/mlx5_ifc_vhca_event.h"
+#include "ecpf.h"
+#define CREATE_TRACE_POINTS
+#include "diag/dev_tracepoint.h"
+
+struct mlx5_sf_dev_table {
+ struct xarray devices;
+ unsigned int max_sfs;
+ phys_addr_t base_address;
+ u64 sf_bar_length;
+ struct notifier_block nb;
+ struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */
+ struct workqueue_struct *active_wq;
+ struct work_struct work;
+ u8 stop_active_wq:1;
+ struct mlx5_core_dev *dev;
+};
+
+static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev);
+}
+
+bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
+
+ return table && !xa_empty(&table->devices);
+}
+
+static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev);
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+
+ return sysfs_emit(buf, "%u\n", sf_dev->sfnum);
+}
+static DEVICE_ATTR_RO(sfnum);
+
+static struct attribute *sf_device_attrs[] = {
+ &dev_attr_sfnum.attr,
+ NULL,
+};
+
+static const struct attribute_group sf_attr_group = {
+ .attrs = sf_device_attrs,
+};
+
+static const struct attribute_group *sf_attr_groups[2] = {
+ &sf_attr_group,
+ NULL
+};
+
+static void mlx5_sf_dev_release(struct device *device)
+{
+ struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev);
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+
+ mlx5_adev_idx_free(adev->id);
+ kfree(sf_dev);
+}
+
+static void mlx5_sf_dev_remove(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev)
+{
+ int id;
+
+ id = sf_dev->adev.id;
+ trace_mlx5_sf_dev_del(dev, sf_dev, id);
+
+ auxiliary_device_delete(&sf_dev->adev);
+ auxiliary_device_uninit(&sf_dev->adev);
+}
+
+static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, u32 sfnum)
+{
+ struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
+ struct mlx5_sf_dev *sf_dev;
+ struct pci_dev *pdev;
+ int err;
+ int id;
+
+ id = mlx5_adev_idx_alloc();
+ if (id < 0) {
+ err = id;
+ goto add_err;
+ }
+
+ sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL);
+ if (!sf_dev) {
+ mlx5_adev_idx_free(id);
+ err = -ENOMEM;
+ goto add_err;
+ }
+ pdev = dev->pdev;
+ sf_dev->adev.id = id;
+ sf_dev->adev.name = MLX5_SF_DEV_ID_NAME;
+ sf_dev->adev.dev.release = mlx5_sf_dev_release;
+ sf_dev->adev.dev.parent = &pdev->dev;
+ sf_dev->adev.dev.groups = sf_attr_groups;
+ sf_dev->sfnum = sfnum;
+ sf_dev->parent_mdev = dev;
+ sf_dev->fn_id = fn_id;
+
+ if (!table->max_sfs) {
+ mlx5_adev_idx_free(id);
+ kfree(sf_dev);
+ err = -EOPNOTSUPP;
+ goto add_err;
+ }
+ sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length);
+
+ trace_mlx5_sf_dev_add(dev, sf_dev, id);
+
+ err = auxiliary_device_init(&sf_dev->adev);
+ if (err) {
+ mlx5_adev_idx_free(id);
+ kfree(sf_dev);
+ goto add_err;
+ }
+
+ err = auxiliary_device_add(&sf_dev->adev);
+ if (err) {
+ put_device(&sf_dev->adev.dev);
+ goto add_err;
+ }
+
+ err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL);
+ if (err)
+ goto xa_err;
+ return;
+
+xa_err:
+ mlx5_sf_dev_remove(dev, sf_dev);
+add_err:
+ mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n",
+ sf_index, sfnum, err);
+}
+
+static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index)
+{
+ struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
+
+ xa_erase(&table->devices, sf_index);
+ mlx5_sf_dev_remove(dev, sf_dev);
+}
+
+static int
+mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data)
+{
+ struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb);
+ const struct mlx5_vhca_state_event *event = data;
+ struct mlx5_sf_dev *sf_dev;
+ u16 max_functions;
+ u16 sf_index;
+ u16 base_id;
+
+ max_functions = mlx5_sf_max_functions(table->dev);
+ if (!max_functions)
+ return 0;
+
+ base_id = MLX5_CAP_GEN(table->dev, sf_base_id);
+ if (event->function_id < base_id || event->function_id >= (base_id + max_functions))
+ return 0;
+
+ sf_index = event->function_id - base_id;
+ mutex_lock(&table->table_lock);
+ sf_dev = xa_load(&table->devices, sf_index);
+ switch (event->new_vhca_state) {
+ case MLX5_VHCA_STATE_INVALID:
+ case MLX5_VHCA_STATE_ALLOCATED:
+ if (sf_dev)
+ mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
+ break;
+ case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
+ if (sf_dev)
+ mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
+ else
+ mlx5_core_err(table->dev,
+ "SF DEV: teardown state for invalid dev index=%d fn_id=0x%x\n",
+ sf_index, event->sw_function_id);
+ break;
+ case MLX5_VHCA_STATE_ACTIVE:
+ if (!sf_dev)
+ mlx5_sf_dev_add(table->dev, sf_index, event->function_id,
+ event->sw_function_id);
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&table->table_lock);
+ return 0;
+}
+
+static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
+{
+ struct mlx5_core_dev *dev = table->dev;
+ u16 max_functions;
+ u16 function_id;
+ int err = 0;
+ int i;
+
+ max_functions = mlx5_sf_max_functions(dev);
+ function_id = MLX5_CAP_GEN(dev, sf_base_id);
+ /* Arm the vhca context as the vhca event notifier */
+ for (i = 0; i < max_functions; i++) {
+ err = mlx5_vhca_event_arm(dev, function_id);
+ if (err)
+ return err;
+
+ function_id++;
+ }
+ return 0;
+}
+
+static void mlx5_sf_dev_add_active_work(struct work_struct *work)
+{
+ struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work);
+ u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+ struct mlx5_core_dev *dev = table->dev;
+ u16 max_functions;
+ u16 function_id;
+ u16 sw_func_id;
+ int err = 0;
+ u8 state;
+ int i;
+
+ max_functions = mlx5_sf_max_functions(dev);
+ function_id = MLX5_CAP_GEN(dev, sf_base_id);
+ for (i = 0; i < max_functions; i++, function_id++) {
+ if (table->stop_active_wq)
+ return;
+ err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out));
+ if (err)
+ /* A failure of specific vhca doesn't mean others will
+ * fail as well.
+ */
+ continue;
+ state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
+ if (state != MLX5_VHCA_STATE_ACTIVE)
+ continue;
+
+ sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id);
+ mutex_lock(&table->table_lock);
+ /* Don't probe device which is already probe */
+ if (!xa_load(&table->devices, i))
+ mlx5_sf_dev_add(dev, i, function_id, sw_func_id);
+ /* There is a race where SF got inactive after the query
+ * above. e.g.: the query returns that the state of the
+ * SF is active, and after that the eswitch manager set it to
+ * inactive.
+ * This case cannot be managed in SW, since the probing of the
+ * SF is on one system, and the inactivation is on a different
+ * system.
+ * If the inactive is done after the SF perform init_hca(),
+ * the SF will fully probe and then removed. If it was
+ * done before init_hca(), the SF probe will fail.
+ */
+ mutex_unlock(&table->table_lock);
+ }
+}
+
+/* In case SFs are generated externally, probe active SFs */
+static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table)
+{
+ if (MLX5_CAP_GEN(table->dev, eswitch_manager))
+ return 0; /* the table is local */
+
+ /* Use a workqueue to probe active SFs, which are in large
+ * quantity and may take up to minutes to probe.
+ */
+ table->active_wq = create_singlethread_workqueue("mlx5_active_sf");
+ if (!table->active_wq)
+ return -ENOMEM;
+ INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work);
+ queue_work(table->active_wq, &table->work);
+ return 0;
+}
+
+static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table)
+{
+ if (table->active_wq) {
+ table->stop_active_wq = true;
+ destroy_workqueue(table->active_wq);
+ }
+}
+
+void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_dev_table *table;
+ unsigned int max_sfs;
+ int err;
+
+ if (!mlx5_sf_dev_supported(dev) || !mlx5_vhca_event_supported(dev))
+ return;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table) {
+ err = -ENOMEM;
+ goto table_err;
+ }
+
+ table->nb.notifier_call = mlx5_sf_dev_state_change_handler;
+ table->dev = dev;
+ if (MLX5_CAP_GEN(dev, max_num_sf))
+ max_sfs = MLX5_CAP_GEN(dev, max_num_sf);
+ else
+ max_sfs = 1 << MLX5_CAP_GEN(dev, log_max_sf);
+ table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12);
+ table->base_address = pci_resource_start(dev->pdev, 2);
+ table->max_sfs = max_sfs;
+ xa_init(&table->devices);
+ mutex_init(&table->table_lock);
+ dev->priv.sf_dev_table = table;
+
+ err = mlx5_vhca_event_notifier_register(dev, &table->nb);
+ if (err)
+ goto vhca_err;
+
+ err = mlx5_sf_dev_queue_active_work(table);
+ if (err)
+ goto add_active_err;
+
+ err = mlx5_sf_dev_vhca_arm_all(table);
+ if (err)
+ goto arm_err;
+ mlx5_core_dbg(dev, "SF DEV: max sf devices=%d\n", max_sfs);
+ return;
+
+arm_err:
+ mlx5_sf_dev_destroy_active_work(table);
+add_active_err:
+ mlx5_vhca_event_notifier_unregister(dev, &table->nb);
+vhca_err:
+ table->max_sfs = 0;
+ kfree(table);
+ dev->priv.sf_dev_table = NULL;
+table_err:
+ mlx5_core_err(dev, "SF DEV table create err = %d\n", err);
+}
+
+static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table)
+{
+ struct mlx5_sf_dev *sf_dev;
+ unsigned long index;
+
+ xa_for_each(&table->devices, index, sf_dev) {
+ xa_erase(&table->devices, index);
+ mlx5_sf_dev_remove(table->dev, sf_dev);
+ }
+}
+
+void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
+
+ if (!table)
+ return;
+
+ mlx5_sf_dev_destroy_active_work(table);
+ mlx5_vhca_event_notifier_unregister(dev, &table->nb);
+ mutex_destroy(&table->table_lock);
+
+ /* Now that event handler is not running, it is safe to destroy
+ * the sf device without race.
+ */
+ mlx5_sf_dev_destroy_all(table);
+
+ WARN_ON(!xa_empty(&table->devices));
+ kfree(table);
+ dev->priv.sf_dev_table = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
new file mode 100644
index 000000000..2a66a427e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_SF_DEV_H__
+#define __MLX5_SF_DEV_H__
+
+#ifdef CONFIG_MLX5_SF
+
+#include <linux/auxiliary_bus.h>
+
+#define MLX5_SF_DEV_ID_NAME "sf"
+
+struct mlx5_sf_dev {
+ struct auxiliary_device adev;
+ struct mlx5_core_dev *parent_mdev;
+ struct mlx5_core_dev *mdev;
+ phys_addr_t bar_base_addr;
+ u32 sfnum;
+ u16 fn_id;
+};
+
+void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev);
+void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_sf_driver_register(void);
+void mlx5_sf_driver_unregister(void);
+
+bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev);
+
+#else
+
+static inline void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
+{
+}
+
+static inline void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
+{
+}
+
+static inline int mlx5_sf_driver_register(void)
+{
+ return 0;
+}
+
+static inline void mlx5_sf_driver_unregister(void)
+{
+}
+
+static inline bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
+{
+ return false;
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h
new file mode 100644
index 000000000..7f7c9af5d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_SF_DEV_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_SF_DEV_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/mlx5/driver.h>
+#include "../../dev/dev.h"
+
+DECLARE_EVENT_CLASS(mlx5_sf_dev_template,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_sf_dev *sfdev,
+ int aux_id),
+ TP_ARGS(dev, sfdev, aux_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const struct mlx5_sf_dev*, sfdev)
+ __field(int, aux_id)
+ __field(u16, hw_fn_id)
+ __field(u32, sfnum)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->sfdev = sfdev;
+ __entry->aux_id = aux_id;
+ __entry->hw_fn_id = sfdev->fn_id;
+ __entry->sfnum = sfdev->sfnum;
+ ),
+ TP_printk("(%s) sfdev=%pK aux_id=%d hw_id=0x%x sfnum=%u\n",
+ __get_str(devname), __entry->sfdev,
+ __entry->aux_id, __entry->hw_fn_id,
+ __entry->sfnum)
+);
+
+DEFINE_EVENT(mlx5_sf_dev_template, mlx5_sf_dev_add,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_sf_dev *sfdev,
+ int aux_id),
+ TP_ARGS(dev, sfdev, aux_id)
+ );
+
+DEFINE_EVENT(mlx5_sf_dev_template, mlx5_sf_dev_del,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_sf_dev *sfdev,
+ int aux_id),
+ TP_ARGS(dev, sfdev, aux_id)
+ );
+
+#endif /* _MLX5_SF_DEV_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH sf/dev/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE dev_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
new file mode 100644
index 000000000..2424cdf9c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+#include "mlx5_core.h"
+#include "dev.h"
+#include "devlink.h"
+
+static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id)
+{
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+ struct mlx5_core_dev *mdev;
+ struct devlink *devlink;
+ int err;
+
+ devlink = mlx5_devlink_alloc(&adev->dev);
+ if (!devlink)
+ return -ENOMEM;
+
+ mdev = devlink_priv(devlink);
+ mdev->device = &adev->dev;
+ mdev->pdev = sf_dev->parent_mdev->pdev;
+ mdev->bar_addr = sf_dev->bar_base_addr;
+ mdev->iseg_base = sf_dev->bar_base_addr;
+ mdev->coredev_type = MLX5_COREDEV_SF;
+ mdev->priv.parent_mdev = sf_dev->parent_mdev;
+ mdev->priv.adev_idx = adev->id;
+ sf_dev->mdev = mdev;
+
+ err = mlx5_mdev_init(mdev, MLX5_DEFAULT_PROF);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err);
+ goto mdev_err;
+ }
+
+ mdev->iseg = ioremap(mdev->iseg_base, sizeof(*mdev->iseg));
+ if (!mdev->iseg) {
+ mlx5_core_warn(mdev, "remap error\n");
+ err = -ENOMEM;
+ goto remap_err;
+ }
+
+ err = mlx5_init_one(mdev);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err);
+ goto init_one_err;
+ }
+ devlink_register(devlink);
+ return 0;
+
+init_one_err:
+ iounmap(mdev->iseg);
+remap_err:
+ mlx5_mdev_uninit(mdev);
+mdev_err:
+ mlx5_devlink_free(devlink);
+ return err;
+}
+
+static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
+{
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+ struct devlink *devlink = priv_to_devlink(sf_dev->mdev);
+
+ mlx5_drain_health_wq(sf_dev->mdev);
+ devlink_unregister(devlink);
+ mlx5_uninit_one(sf_dev->mdev);
+ iounmap(sf_dev->mdev->iseg);
+ mlx5_mdev_uninit(sf_dev->mdev);
+ mlx5_devlink_free(devlink);
+}
+
+static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
+{
+ struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+
+ mlx5_unload_one(sf_dev->mdev, false);
+}
+
+static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = {
+ { .name = MLX5_ADEV_NAME "." MLX5_SF_DEV_ID_NAME, },
+ { },
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5_sf_dev_id_table);
+
+static struct auxiliary_driver mlx5_sf_driver = {
+ .name = MLX5_SF_DEV_ID_NAME,
+ .probe = mlx5_sf_dev_probe,
+ .remove = mlx5_sf_dev_remove,
+ .shutdown = mlx5_sf_dev_shutdown,
+ .id_table = mlx5_sf_dev_id_table,
+};
+
+int mlx5_sf_driver_register(void)
+{
+ return auxiliary_driver_register(&mlx5_sf_driver);
+}
+
+void mlx5_sf_driver_unregister(void)
+{
+ auxiliary_driver_unregister(&mlx5_sf_driver);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
new file mode 100644
index 000000000..7d955a4d9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -0,0 +1,571 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include "eswitch.h"
+#include "priv.h"
+#include "sf/dev/dev.h"
+#include "mlx5_ifc_vhca_event.h"
+#include "vhca_event.h"
+#include "ecpf.h"
+#define CREATE_TRACE_POINTS
+#include "diag/sf_tracepoint.h"
+
+struct mlx5_sf {
+ struct devlink_port dl_port;
+ unsigned int port_index;
+ u32 controller;
+ u16 id;
+ u16 hw_fn_id;
+ u16 hw_state;
+};
+
+struct mlx5_sf_table {
+ struct mlx5_core_dev *dev; /* To refer from notifier context. */
+ struct xarray port_indices; /* port index based lookup. */
+ refcount_t refcount;
+ struct completion disable_complete;
+ struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */
+ struct notifier_block esw_nb;
+ struct notifier_block vhca_nb;
+ u8 ecpu: 1;
+};
+
+static struct mlx5_sf *
+mlx5_sf_lookup_by_index(struct mlx5_sf_table *table, unsigned int port_index)
+{
+ return xa_load(&table->port_indices, port_index);
+}
+
+static struct mlx5_sf *
+mlx5_sf_lookup_by_function_id(struct mlx5_sf_table *table, unsigned int fn_id)
+{
+ unsigned long index;
+ struct mlx5_sf *sf;
+
+ xa_for_each(&table->port_indices, index, sf) {
+ if (sf->hw_fn_id == fn_id)
+ return sf;
+ }
+ return NULL;
+}
+
+static int mlx5_sf_id_insert(struct mlx5_sf_table *table, struct mlx5_sf *sf)
+{
+ return xa_insert(&table->port_indices, sf->port_index, sf, GFP_KERNEL);
+}
+
+static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf)
+{
+ xa_erase(&table->port_indices, sf->port_index);
+}
+
+static struct mlx5_sf *
+mlx5_sf_alloc(struct mlx5_sf_table *table, struct mlx5_eswitch *esw,
+ u32 controller, u32 sfnum, struct netlink_ext_ack *extack)
+{
+ unsigned int dl_port_index;
+ struct mlx5_sf *sf;
+ u16 hw_fn_id;
+ int id_err;
+ int err;
+
+ if (!mlx5_esw_offloads_controller_valid(esw, controller)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid controller number");
+ return ERR_PTR(-EINVAL);
+ }
+
+ id_err = mlx5_sf_hw_table_sf_alloc(table->dev, controller, sfnum);
+ if (id_err < 0) {
+ err = id_err;
+ goto id_err;
+ }
+
+ sf = kzalloc(sizeof(*sf), GFP_KERNEL);
+ if (!sf) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+ sf->id = id_err;
+ hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, controller, sf->id);
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id);
+ sf->port_index = dl_port_index;
+ sf->hw_fn_id = hw_fn_id;
+ sf->hw_state = MLX5_VHCA_STATE_ALLOCATED;
+ sf->controller = controller;
+
+ err = mlx5_sf_id_insert(table, sf);
+ if (err)
+ goto insert_err;
+
+ return sf;
+
+insert_err:
+ kfree(sf);
+alloc_err:
+ mlx5_sf_hw_table_sf_free(table->dev, controller, id_err);
+id_err:
+ if (err == -EEXIST)
+ NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum");
+ return ERR_PTR(err);
+}
+
+static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf)
+{
+ mlx5_sf_id_erase(table, sf);
+ mlx5_sf_hw_table_sf_free(table->dev, sf->controller, sf->id);
+ trace_mlx5_sf_free(table->dev, sf->port_index, sf->controller, sf->hw_fn_id);
+ kfree(sf);
+}
+
+static struct mlx5_sf_table *mlx5_sf_table_try_get(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_table *table = dev->priv.sf_table;
+
+ if (!table)
+ return NULL;
+
+ return refcount_inc_not_zero(&table->refcount) ? table : NULL;
+}
+
+static void mlx5_sf_table_put(struct mlx5_sf_table *table)
+{
+ if (refcount_dec_and_test(&table->refcount))
+ complete(&table->disable_complete);
+}
+
+static enum devlink_port_fn_state mlx5_sf_to_devlink_state(u8 hw_state)
+{
+ switch (hw_state) {
+ case MLX5_VHCA_STATE_ACTIVE:
+ case MLX5_VHCA_STATE_IN_USE:
+ return DEVLINK_PORT_FN_STATE_ACTIVE;
+ case MLX5_VHCA_STATE_INVALID:
+ case MLX5_VHCA_STATE_ALLOCATED:
+ case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
+ default:
+ return DEVLINK_PORT_FN_STATE_INACTIVE;
+ }
+}
+
+static enum devlink_port_fn_opstate mlx5_sf_to_devlink_opstate(u8 hw_state)
+{
+ switch (hw_state) {
+ case MLX5_VHCA_STATE_IN_USE:
+ case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
+ return DEVLINK_PORT_FN_OPSTATE_ATTACHED;
+ case MLX5_VHCA_STATE_INVALID:
+ case MLX5_VHCA_STATE_ALLOCATED:
+ case MLX5_VHCA_STATE_ACTIVE:
+ default:
+ return DEVLINK_PORT_FN_OPSTATE_DETACHED;
+ }
+}
+
+static bool mlx5_sf_is_active(const struct mlx5_sf *sf)
+{
+ return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE;
+}
+
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
+ enum devlink_port_fn_state *state,
+ enum devlink_port_fn_opstate *opstate,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
+ struct mlx5_sf_table *table;
+ struct mlx5_sf *sf;
+ int err = 0;
+
+ table = mlx5_sf_table_try_get(dev);
+ if (!table)
+ return -EOPNOTSUPP;
+
+ sf = mlx5_sf_lookup_by_index(table, dl_port->index);
+ if (!sf) {
+ err = -EOPNOTSUPP;
+ goto sf_err;
+ }
+ mutex_lock(&table->sf_state_lock);
+ *state = mlx5_sf_to_devlink_state(sf->hw_state);
+ *opstate = mlx5_sf_to_devlink_opstate(sf->hw_state);
+ mutex_unlock(&table->sf_state_lock);
+sf_err:
+ mlx5_sf_table_put(table);
+ return err;
+}
+
+static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (mlx5_sf_is_active(sf))
+ return 0;
+ if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) {
+ NL_SET_ERR_MSG_MOD(extack, "SF is inactivated but it is still attached");
+ return -EBUSY;
+ }
+
+ err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id);
+ if (err)
+ return err;
+
+ sf->hw_state = MLX5_VHCA_STATE_ACTIVE;
+ trace_mlx5_sf_activate(dev, sf->port_index, sf->controller, sf->hw_fn_id);
+ return 0;
+}
+
+static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf)
+{
+ int err;
+
+ if (!mlx5_sf_is_active(sf))
+ return 0;
+
+ err = mlx5_cmd_sf_disable_hca(dev, sf->hw_fn_id);
+ if (err)
+ return err;
+
+ sf->hw_state = MLX5_VHCA_STATE_TEARDOWN_REQUEST;
+ trace_mlx5_sf_deactivate(dev, sf->port_index, sf->controller, sf->hw_fn_id);
+ return 0;
+}
+
+static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
+ struct mlx5_sf *sf,
+ enum devlink_port_fn_state state,
+ struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ mutex_lock(&table->sf_state_lock);
+ if (state == mlx5_sf_to_devlink_state(sf->hw_state))
+ goto out;
+ if (state == DEVLINK_PORT_FN_STATE_ACTIVE)
+ err = mlx5_sf_activate(dev, sf, extack);
+ else if (state == DEVLINK_PORT_FN_STATE_INACTIVE)
+ err = mlx5_sf_deactivate(dev, sf);
+ else
+ err = -EINVAL;
+out:
+ mutex_unlock(&table->sf_state_lock);
+ return err;
+}
+
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
+ enum devlink_port_fn_state state,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
+ struct mlx5_sf_table *table;
+ struct mlx5_sf *sf;
+ int err;
+
+ table = mlx5_sf_table_try_get(dev);
+ if (!table) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port state set is only supported in eswitch switchdev mode or SF ports are disabled.");
+ return -EOPNOTSUPP;
+ }
+ sf = mlx5_sf_lookup_by_index(table, dl_port->index);
+ if (!sf) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = mlx5_sf_state_set(dev, table, sf, state, extack);
+out:
+ mlx5_sf_table_put(table);
+ return err;
+}
+
+static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
+ const struct devlink_port_new_attrs *new_attr,
+ struct netlink_ext_ack *extack,
+ unsigned int *new_port_index)
+{
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_sf *sf;
+ int err;
+
+ sf = mlx5_sf_alloc(table, esw, new_attr->controller, new_attr->sfnum, extack);
+ if (IS_ERR(sf))
+ return PTR_ERR(sf);
+
+ err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id,
+ new_attr->controller, new_attr->sfnum);
+ if (err)
+ goto esw_err;
+ *new_port_index = sf->port_index;
+ trace_mlx5_sf_add(dev, sf->port_index, sf->controller, sf->hw_fn_id, new_attr->sfnum);
+ return 0;
+
+esw_err:
+ mlx5_sf_free(table, sf);
+ return err;
+}
+
+static int
+mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_attrs *new_attr,
+ struct netlink_ext_ack *extack)
+{
+ if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) {
+ NL_SET_ERR_MSG_MOD(extack, "Driver supports only SF port addition");
+ return -EOPNOTSUPP;
+ }
+ if (new_attr->port_index_valid) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Driver does not support user defined port index assignment");
+ return -EOPNOTSUPP;
+ }
+ if (!new_attr->sfnum_valid) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "User must provide unique sfnum. Driver does not support auto assignment");
+ return -EOPNOTSUPP;
+ }
+ if (new_attr->controller_valid && new_attr->controller &&
+ !mlx5_core_is_ecpf_esw_manager(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported");
+ return -EOPNOTSUPP;
+ }
+ if (new_attr->pfnum != mlx5_get_dev_index(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+int mlx5_devlink_sf_port_new(struct devlink *devlink,
+ const struct devlink_port_new_attrs *new_attr,
+ struct netlink_ext_ack *extack,
+ unsigned int *new_port_index)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_sf_table *table;
+ int err;
+
+ err = mlx5_sf_new_check_attr(dev, new_attr, extack);
+ if (err)
+ return err;
+
+ table = mlx5_sf_table_try_get(dev);
+ if (!table) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port add is only supported in eswitch switchdev mode or SF ports are disabled.");
+ return -EOPNOTSUPP;
+ }
+ err = mlx5_sf_add(dev, table, new_attr, extack, new_port_index);
+ mlx5_sf_table_put(table);
+ return err;
+}
+
+static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf)
+{
+ if (sf->hw_state == MLX5_VHCA_STATE_ALLOCATED) {
+ mlx5_sf_free(table, sf);
+ } else if (mlx5_sf_is_active(sf)) {
+ /* Even if its active, it is treated as in_use because by the time,
+ * it is disabled here, it may getting used. So it is safe to
+ * always look for the event to ensure that it is recycled only after
+ * firmware gives confirmation that it is detached by the driver.
+ */
+ mlx5_cmd_sf_disable_hca(table->dev, sf->hw_fn_id);
+ mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
+ kfree(sf);
+ } else {
+ mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
+ kfree(sf);
+ }
+}
+
+int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_sf_table *table;
+ struct mlx5_sf *sf;
+ int err = 0;
+
+ table = mlx5_sf_table_try_get(dev);
+ if (!table) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port del is only supported in eswitch switchdev mode or SF ports are disabled.");
+ return -EOPNOTSUPP;
+ }
+ sf = mlx5_sf_lookup_by_index(table, port_index);
+ if (!sf) {
+ err = -ENODEV;
+ goto sf_err;
+ }
+
+ mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id);
+ mlx5_sf_id_erase(table, sf);
+
+ mutex_lock(&table->sf_state_lock);
+ mlx5_sf_dealloc(table, sf);
+ mutex_unlock(&table->sf_state_lock);
+sf_err:
+ mlx5_sf_table_put(table);
+ return err;
+}
+
+static bool mlx5_sf_state_update_check(const struct mlx5_sf *sf, u8 new_state)
+{
+ if (sf->hw_state == MLX5_VHCA_STATE_ACTIVE && new_state == MLX5_VHCA_STATE_IN_USE)
+ return true;
+
+ if (sf->hw_state == MLX5_VHCA_STATE_IN_USE && new_state == MLX5_VHCA_STATE_ACTIVE)
+ return true;
+
+ if (sf->hw_state == MLX5_VHCA_STATE_TEARDOWN_REQUEST &&
+ new_state == MLX5_VHCA_STATE_ALLOCATED)
+ return true;
+
+ return false;
+}
+
+static int mlx5_sf_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data)
+{
+ struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, vhca_nb);
+ const struct mlx5_vhca_state_event *event = data;
+ bool update = false;
+ struct mlx5_sf *sf;
+
+ table = mlx5_sf_table_try_get(table->dev);
+ if (!table)
+ return 0;
+
+ mutex_lock(&table->sf_state_lock);
+ sf = mlx5_sf_lookup_by_function_id(table, event->function_id);
+ if (!sf)
+ goto sf_err;
+
+ /* When driver is attached or detached to a function, an event
+ * notifies such state change.
+ */
+ update = mlx5_sf_state_update_check(sf, event->new_vhca_state);
+ if (update)
+ sf->hw_state = event->new_vhca_state;
+ trace_mlx5_sf_update_state(table->dev, sf->port_index, sf->controller,
+ sf->hw_fn_id, sf->hw_state);
+sf_err:
+ mutex_unlock(&table->sf_state_lock);
+ mlx5_sf_table_put(table);
+ return 0;
+}
+
+static void mlx5_sf_table_enable(struct mlx5_sf_table *table)
+{
+ init_completion(&table->disable_complete);
+ refcount_set(&table->refcount, 1);
+}
+
+static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table)
+{
+ struct mlx5_eswitch *esw = table->dev->priv.eswitch;
+ unsigned long index;
+ struct mlx5_sf *sf;
+
+ /* At this point, no new user commands can start and no vhca event can
+ * arrive. It is safe to destroy all user created SFs.
+ */
+ xa_for_each(&table->port_indices, index, sf) {
+ mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id);
+ mlx5_sf_id_erase(table, sf);
+ mlx5_sf_dealloc(table, sf);
+ }
+}
+
+static void mlx5_sf_table_disable(struct mlx5_sf_table *table)
+{
+ if (!refcount_read(&table->refcount))
+ return;
+
+ /* Balances with refcount_set; drop the reference so that new user cmd cannot start
+ * and new vhca event handler cannot run.
+ */
+ mlx5_sf_table_put(table);
+ wait_for_completion(&table->disable_complete);
+
+ mlx5_sf_deactivate_all(table);
+}
+
+static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, esw_nb);
+ const struct mlx5_esw_event_info *mode = data;
+
+ switch (mode->new_mode) {
+ case MLX5_ESWITCH_OFFLOADS:
+ mlx5_sf_table_enable(table);
+ break;
+ case MLX5_ESWITCH_LEGACY:
+ mlx5_sf_table_disable(table);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev)
+{
+ return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) &&
+ mlx5_sf_hw_table_supported(dev);
+}
+
+int mlx5_sf_table_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_table *table;
+ int err;
+
+ if (!mlx5_sf_table_supported(dev) || !mlx5_vhca_event_supported(dev))
+ return 0;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ mutex_init(&table->sf_state_lock);
+ table->dev = dev;
+ xa_init(&table->port_indices);
+ dev->priv.sf_table = table;
+ refcount_set(&table->refcount, 0);
+ table->esw_nb.notifier_call = mlx5_sf_esw_event;
+ err = mlx5_esw_event_notifier_register(dev->priv.eswitch, &table->esw_nb);
+ if (err)
+ goto reg_err;
+
+ table->vhca_nb.notifier_call = mlx5_sf_vhca_event;
+ err = mlx5_vhca_event_notifier_register(table->dev, &table->vhca_nb);
+ if (err)
+ goto vhca_err;
+
+ return 0;
+
+vhca_err:
+ mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb);
+reg_err:
+ mutex_destroy(&table->sf_state_lock);
+ kfree(table);
+ dev->priv.sf_table = NULL;
+ return err;
+}
+
+void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_table *table = dev->priv.sf_table;
+
+ if (!table)
+ return;
+
+ mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb);
+ mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb);
+ WARN_ON(refcount_read(&table->refcount));
+ mutex_destroy(&table->sf_state_lock);
+ WARN_ON(!xa_empty(&table->port_indices));
+ kfree(table);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h
new file mode 100644
index 000000000..8bf1cd909
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_SF_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_SF_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/mlx5/driver.h>
+#include "sf/vhca_event.h"
+
+TRACE_EVENT(mlx5_sf_add,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ unsigned int port_index,
+ u32 controller,
+ u16 hw_fn_id,
+ u32 sfnum),
+ TP_ARGS(dev, port_index, controller, hw_fn_id, sfnum),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(unsigned int, port_index)
+ __field(u32, controller)
+ __field(u16, hw_fn_id)
+ __field(u32, sfnum)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->port_index = port_index;
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ __entry->sfnum = sfnum;
+ ),
+ TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x sfnum=%u\n",
+ __get_str(devname), __entry->port_index, __entry->controller,
+ __entry->hw_fn_id, __entry->sfnum)
+);
+
+TRACE_EVENT(mlx5_sf_free,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ unsigned int port_index,
+ u32 controller,
+ u16 hw_fn_id),
+ TP_ARGS(dev, port_index, controller, hw_fn_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(unsigned int, port_index)
+ __field(u32, controller)
+ __field(u16, hw_fn_id)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->port_index = port_index;
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ ),
+ TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x\n",
+ __get_str(devname), __entry->port_index, __entry->controller,
+ __entry->hw_fn_id)
+);
+
+TRACE_EVENT(mlx5_sf_hwc_alloc,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u32 controller,
+ u16 hw_fn_id,
+ u32 sfnum),
+ TP_ARGS(dev, controller, hw_fn_id, sfnum),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(u32, controller)
+ __field(u16, hw_fn_id)
+ __field(u32, sfnum)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ __entry->sfnum = sfnum;
+ ),
+ TP_printk("(%s) controller=%u hw_id=0x%x sfnum=%u\n",
+ __get_str(devname), __entry->controller, __entry->hw_fn_id,
+ __entry->sfnum)
+);
+
+TRACE_EVENT(mlx5_sf_hwc_free,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u16 hw_fn_id),
+ TP_ARGS(dev, hw_fn_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(u16, hw_fn_id)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->hw_fn_id = hw_fn_id;
+ ),
+ TP_printk("(%s) hw_id=0x%x\n", __get_str(devname), __entry->hw_fn_id)
+);
+
+TRACE_EVENT(mlx5_sf_hwc_deferred_free,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u16 hw_fn_id),
+ TP_ARGS(dev, hw_fn_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(u16, hw_fn_id)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->hw_fn_id = hw_fn_id;
+ ),
+ TP_printk("(%s) hw_id=0x%x\n", __get_str(devname), __entry->hw_fn_id)
+);
+
+DECLARE_EVENT_CLASS(mlx5_sf_state_template,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u32 port_index,
+ u32 controller,
+ u16 hw_fn_id),
+ TP_ARGS(dev, port_index, controller, hw_fn_id),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(unsigned int, port_index)
+ __field(u32, controller)
+ __field(u16, hw_fn_id)),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->port_index = port_index;
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ ),
+ TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x\n",
+ __get_str(devname), __entry->port_index, __entry->controller,
+ __entry->hw_fn_id)
+);
+
+DEFINE_EVENT(mlx5_sf_state_template, mlx5_sf_activate,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u32 port_index,
+ u32 controller,
+ u16 hw_fn_id),
+ TP_ARGS(dev, port_index, controller, hw_fn_id)
+ );
+
+DEFINE_EVENT(mlx5_sf_state_template, mlx5_sf_deactivate,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ u32 port_index,
+ u32 controller,
+ u16 hw_fn_id),
+ TP_ARGS(dev, port_index, controller, hw_fn_id)
+ );
+
+TRACE_EVENT(mlx5_sf_update_state,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ unsigned int port_index,
+ u32 controller,
+ u16 hw_fn_id,
+ u8 state),
+ TP_ARGS(dev, port_index, controller, hw_fn_id, state),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(unsigned int, port_index)
+ __field(u32, controller)
+ __field(u16, hw_fn_id)
+ __field(u8, state)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->port_index = port_index;
+ __entry->controller = controller;
+ __entry->hw_fn_id = hw_fn_id;
+ __entry->state = state;
+ ),
+ TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x state=%u\n",
+ __get_str(devname), __entry->port_index, __entry->controller,
+ __entry->hw_fn_id, __entry->state)
+);
+
+#endif /* _MLX5_SF_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH sf/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE sf_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h
new file mode 100644
index 000000000..fd814a190
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_SF_VHCA_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_SF_VHCA_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/mlx5/driver.h>
+#include "sf/vhca_event.h"
+
+TRACE_EVENT(mlx5_sf_vhca_event,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_vhca_state_event *event),
+ TP_ARGS(dev, event),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(u16, hw_fn_id)
+ __field(u32, sfnum)
+ __field(u8, vhca_state)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->hw_fn_id = event->function_id;
+ __entry->sfnum = event->sw_function_id;
+ __entry->vhca_state = event->new_vhca_state;
+ ),
+ TP_printk("(%s) hw_id=0x%x sfnum=%u vhca_state=%d\n",
+ __get_str(devname), __entry->hw_fn_id,
+ __entry->sfnum, __entry->vhca_state)
+);
+
+#endif /* _MLX5_SF_VHCA_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH sf/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE vhca_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
new file mode 100644
index 000000000..17aa34898
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+#include <linux/mlx5/driver.h>
+#include "vhca_event.h"
+#include "priv.h"
+#include "sf.h"
+#include "mlx5_ifc_vhca_event.h"
+#include "ecpf.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "diag/sf_tracepoint.h"
+
+struct mlx5_sf_hw {
+ u32 usr_sfnum;
+ u8 allocated: 1;
+ u8 pending_delete: 1;
+};
+
+struct mlx5_sf_hwc_table {
+ struct mlx5_sf_hw *sfs;
+ int max_fn;
+ u16 start_fn_id;
+};
+
+enum mlx5_sf_hwc_index {
+ MLX5_SF_HWC_LOCAL,
+ MLX5_SF_HWC_EXTERNAL,
+ MLX5_SF_HWC_MAX,
+};
+
+struct mlx5_sf_hw_table {
+ struct mlx5_core_dev *dev;
+ struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */
+ struct notifier_block vhca_nb;
+ struct mlx5_sf_hwc_table hwc[MLX5_SF_HWC_MAX];
+};
+
+static struct mlx5_sf_hwc_table *
+mlx5_sf_controller_to_hwc(struct mlx5_core_dev *dev, u32 controller)
+{
+ int idx = !!controller;
+
+ return &dev->priv.sf_hw_table->hwc[idx];
+}
+
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id)
+{
+ struct mlx5_sf_hwc_table *hwc;
+
+ hwc = mlx5_sf_controller_to_hwc(dev, controller);
+ return hwc->start_fn_id + sw_id;
+}
+
+static u16 mlx5_sf_hw_to_sw_id(struct mlx5_sf_hwc_table *hwc, u16 hw_id)
+{
+ return hw_id - hwc->start_fn_id;
+}
+
+static struct mlx5_sf_hwc_table *
+mlx5_sf_table_fn_to_hwc(struct mlx5_sf_hw_table *table, u16 fn_id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(table->hwc); i++) {
+ if (table->hwc[i].max_fn &&
+ fn_id >= table->hwc[i].start_fn_id &&
+ fn_id < (table->hwc[i].start_fn_id + table->hwc[i].max_fn))
+ return &table->hwc[i];
+ }
+ return NULL;
+}
+
+static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 controller,
+ u32 usr_sfnum)
+{
+ struct mlx5_sf_hwc_table *hwc;
+ int free_idx = -1;
+ int i;
+
+ hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
+ if (!hwc->sfs)
+ return -ENOSPC;
+
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (!hwc->sfs[i].allocated && free_idx == -1) {
+ free_idx = i;
+ continue;
+ }
+
+ if (hwc->sfs[i].allocated && hwc->sfs[i].usr_sfnum == usr_sfnum)
+ return -EEXIST;
+ }
+
+ if (free_idx == -1)
+ return -ENOSPC;
+
+ hwc->sfs[free_idx].usr_sfnum = usr_sfnum;
+ hwc->sfs[free_idx].allocated = true;
+ return free_idx;
+}
+
+static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, u32 controller, int id)
+{
+ struct mlx5_sf_hwc_table *hwc;
+
+ hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
+ hwc->sfs[id].allocated = false;
+ hwc->sfs[id].pending_delete = false;
+}
+
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+ u16 hw_fn_id;
+ int sw_id;
+ int err;
+
+ if (!table)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&table->table_lock);
+ sw_id = mlx5_sf_hw_table_id_alloc(table, controller, usr_sfnum);
+ if (sw_id < 0) {
+ err = sw_id;
+ goto exist_err;
+ }
+
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, sw_id);
+ err = mlx5_cmd_alloc_sf(dev, hw_fn_id);
+ if (err)
+ goto err;
+
+ err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, usr_sfnum);
+ if (err)
+ goto vhca_err;
+
+ if (controller) {
+ /* If this SF is for external controller, SF manager
+ * needs to arm firmware to receive the events.
+ */
+ err = mlx5_vhca_event_arm(dev, hw_fn_id);
+ if (err)
+ goto vhca_err;
+ }
+
+ trace_mlx5_sf_hwc_alloc(dev, controller, hw_fn_id, usr_sfnum);
+ mutex_unlock(&table->table_lock);
+ return sw_id;
+
+vhca_err:
+ mlx5_cmd_dealloc_sf(dev, hw_fn_id);
+err:
+ mlx5_sf_hw_table_id_free(table, controller, sw_id);
+exist_err:
+ mutex_unlock(&table->table_lock);
+ return err;
+}
+
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+ u16 hw_fn_id;
+
+ mutex_lock(&table->table_lock);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
+ mlx5_cmd_dealloc_sf(dev, hw_fn_id);
+ mlx5_sf_hw_table_id_free(table, controller, id);
+ mutex_unlock(&table->table_lock);
+}
+
+static void mlx5_sf_hw_table_hwc_sf_free(struct mlx5_core_dev *dev,
+ struct mlx5_sf_hwc_table *hwc, int idx)
+{
+ mlx5_cmd_dealloc_sf(dev, hwc->start_fn_id + idx);
+ hwc->sfs[idx].allocated = false;
+ hwc->sfs[idx].pending_delete = false;
+ trace_mlx5_sf_hwc_free(dev, hwc->start_fn_id + idx);
+}
+
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+ u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+ struct mlx5_sf_hwc_table *hwc;
+ u16 hw_fn_id;
+ u8 state;
+ int err;
+
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
+ hwc = mlx5_sf_controller_to_hwc(dev, controller);
+ mutex_lock(&table->table_lock);
+ err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out));
+ if (err)
+ goto err;
+ state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
+ if (state == MLX5_VHCA_STATE_ALLOCATED) {
+ mlx5_cmd_dealloc_sf(dev, hw_fn_id);
+ hwc->sfs[id].allocated = false;
+ } else {
+ hwc->sfs[id].pending_delete = true;
+ trace_mlx5_sf_hwc_deferred_free(dev, hw_fn_id);
+ }
+err:
+ mutex_unlock(&table->table_lock);
+}
+
+static void mlx5_sf_hw_table_hwc_dealloc_all(struct mlx5_core_dev *dev,
+ struct mlx5_sf_hwc_table *hwc)
+{
+ int i;
+
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (hwc->sfs[i].allocated)
+ mlx5_sf_hw_table_hwc_sf_free(dev, hwc, i);
+ }
+}
+
+static void mlx5_sf_hw_table_dealloc_all(struct mlx5_sf_hw_table *table)
+{
+ mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_EXTERNAL]);
+ mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_LOCAL]);
+}
+
+static int mlx5_sf_hw_table_hwc_init(struct mlx5_sf_hwc_table *hwc, u16 max_fn, u16 base_id)
+{
+ struct mlx5_sf_hw *sfs;
+
+ if (!max_fn)
+ return 0;
+
+ sfs = kcalloc(max_fn, sizeof(*sfs), GFP_KERNEL);
+ if (!sfs)
+ return -ENOMEM;
+
+ hwc->sfs = sfs;
+ hwc->max_fn = max_fn;
+ hwc->start_fn_id = base_id;
+ return 0;
+}
+
+static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc)
+{
+ kfree(hwc->sfs);
+}
+
+int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_hw_table *table;
+ u16 max_ext_fn = 0;
+ u16 ext_base_id = 0;
+ u16 max_fn = 0;
+ u16 base_id;
+ int err;
+
+ if (!mlx5_vhca_event_supported(dev))
+ return 0;
+
+ if (mlx5_sf_supported(dev))
+ max_fn = mlx5_sf_max_functions(dev);
+
+ err = mlx5_esw_sf_max_hpf_functions(dev, &max_ext_fn, &ext_base_id);
+ if (err)
+ return err;
+
+ if (!max_fn && !max_ext_fn)
+ return 0;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ mutex_init(&table->table_lock);
+ table->dev = dev;
+ dev->priv.sf_hw_table = table;
+
+ base_id = mlx5_sf_start_function_id(dev);
+ err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_LOCAL], max_fn, base_id);
+ if (err)
+ goto table_err;
+
+ err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_EXTERNAL],
+ max_ext_fn, ext_base_id);
+ if (err)
+ goto ext_err;
+
+ mlx5_core_dbg(dev, "SF HW table: max sfs = %d, ext sfs = %d\n", max_fn, max_ext_fn);
+ return 0;
+
+ext_err:
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
+table_err:
+ mutex_destroy(&table->table_lock);
+ kfree(table);
+ return err;
+}
+
+void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+
+ if (!table)
+ return;
+
+ mutex_destroy(&table->table_lock);
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_EXTERNAL]);
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
+ kfree(table);
+}
+
+static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data)
+{
+ struct mlx5_sf_hw_table *table = container_of(nb, struct mlx5_sf_hw_table, vhca_nb);
+ const struct mlx5_vhca_state_event *event = data;
+ struct mlx5_sf_hwc_table *hwc;
+ struct mlx5_sf_hw *sf_hw;
+ u16 sw_id;
+
+ if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED)
+ return 0;
+
+ hwc = mlx5_sf_table_fn_to_hwc(table, event->function_id);
+ if (!hwc)
+ return 0;
+
+ sw_id = mlx5_sf_hw_to_sw_id(hwc, event->function_id);
+ sf_hw = &hwc->sfs[sw_id];
+
+ mutex_lock(&table->table_lock);
+ /* SF driver notified through firmware that SF is finally detached.
+ * Hence recycle the sf hardware id for reuse.
+ */
+ if (sf_hw->allocated && sf_hw->pending_delete)
+ mlx5_sf_hw_table_hwc_sf_free(table->dev, hwc, sw_id);
+ mutex_unlock(&table->table_lock);
+ return 0;
+}
+
+int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+
+ if (!table)
+ return 0;
+
+ table->vhca_nb.notifier_call = mlx5_sf_hw_vhca_event;
+ return mlx5_vhca_event_notifier_register(dev, &table->vhca_nb);
+}
+
+void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+
+ if (!table)
+ return;
+
+ mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb);
+ /* Dealloc SFs whose firmware event has been missed. */
+ mlx5_sf_hw_table_dealloc_all(table);
+}
+
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev)
+{
+ return !!dev->priv.sf_hw_table;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h
new file mode 100644
index 000000000..4fc870140
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_IFC_VHCA_EVENT_H__
+#define __MLX5_IFC_VHCA_EVENT_H__
+
+enum mlx5_ifc_vhca_state {
+ MLX5_VHCA_STATE_INVALID = 0x0,
+ MLX5_VHCA_STATE_ALLOCATED = 0x1,
+ MLX5_VHCA_STATE_ACTIVE = 0x2,
+ MLX5_VHCA_STATE_IN_USE = 0x3,
+ MLX5_VHCA_STATE_TEARDOWN_REQUEST = 0x4,
+};
+
+struct mlx5_ifc_vhca_state_context_bits {
+ u8 arm_change_event[0x1];
+ u8 reserved_at_1[0xb];
+ u8 vhca_state[0x4];
+ u8 reserved_at_10[0x10];
+
+ u8 sw_function_id[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_query_vhca_state_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_vhca_state_context_bits vhca_state_context;
+};
+
+struct mlx5_ifc_query_vhca_state_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 embedded_cpu_function[0x1];
+ u8 reserved_at_41[0xf];
+ u8 function_id[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_vhca_state_field_select_bits {
+ u8 reserved_at_0[0x1e];
+ u8 sw_function_id[0x1];
+ u8 arm_change_event[0x1];
+};
+
+struct mlx5_ifc_modify_vhca_state_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_modify_vhca_state_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 embedded_cpu_function[0x1];
+ u8 reserved_at_41[0xf];
+ u8 function_id[0x10];
+
+ struct mlx5_ifc_vhca_state_field_select_bits vhca_state_field_select;
+
+ struct mlx5_ifc_vhca_state_context_bits vhca_state_context;
+};
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
new file mode 100644
index 000000000..7114f3fc3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_SF_PRIV_H__
+#define __MLX5_SF_PRIV_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id);
+int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id);
+
+int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id);
+
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum);
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
new file mode 100644
index 000000000..3a480e06e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_SF_H__
+#define __MLX5_SF_H__
+
+#include <linux/mlx5/driver.h>
+#include "lib/sf.h"
+
+#ifdef CONFIG_MLX5_SF_MANAGER
+
+int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev);
+void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev);
+void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_sf_table_init(struct mlx5_core_dev *dev);
+void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_devlink_sf_port_new(struct devlink *devlink,
+ const struct devlink_port_new_attrs *add_attr,
+ struct netlink_ext_ack *extack,
+ unsigned int *new_port_index);
+int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
+ enum devlink_port_fn_state *state,
+ enum devlink_port_fn_opstate *opstate,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
+ enum devlink_port_fn_state state,
+ struct netlink_ext_ack *extack);
+#else
+
+static inline int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+static inline void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
+{
+}
+
+static inline int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+static inline void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
+{
+}
+
+static inline int mlx5_sf_table_init(struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c
new file mode 100644
index 000000000..d908fba96
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_ifc_vhca_event.h"
+#include "mlx5_core.h"
+#include "vhca_event.h"
+#include "ecpf.h"
+#define CREATE_TRACE_POINTS
+#include "diag/vhca_tracepoint.h"
+
+struct mlx5_vhca_state_notifier {
+ struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
+ struct blocking_notifier_head n_head;
+};
+
+struct mlx5_vhca_event_work {
+ struct work_struct work;
+ struct mlx5_vhca_state_notifier *notifier;
+ struct mlx5_vhca_state_event event;
+};
+
+int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, u32 *out, u32 outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_vhca_state_in)] = {};
+
+ MLX5_SET(query_vhca_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_STATE);
+ MLX5_SET(query_vhca_state_in, in, function_id, function_id);
+ MLX5_SET(query_vhca_state_in, in, embedded_cpu_function, 0);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+static int mlx5_cmd_modify_vhca_state(struct mlx5_core_dev *dev, u16 function_id,
+ u32 *in, u32 inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {};
+
+ MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE);
+ MLX5_SET(modify_vhca_state_in, in, function_id, function_id);
+ MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0);
+
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {};
+
+ MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE);
+ MLX5_SET(modify_vhca_state_in, in, function_id, function_id);
+ MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0);
+ MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.sw_function_id, 1);
+ MLX5_SET(modify_vhca_state_in, in, vhca_state_context.sw_function_id, sw_fn_id);
+
+ return mlx5_cmd_exec_inout(dev, modify_vhca_state, in, out);
+}
+
+int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {};
+
+ MLX5_SET(modify_vhca_state_in, in, vhca_state_context.arm_change_event, 1);
+ MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.arm_change_event, 1);
+
+ return mlx5_cmd_modify_vhca_state(dev, function_id, in, sizeof(in));
+}
+
+static void
+mlx5_vhca_event_notify(struct mlx5_core_dev *dev, struct mlx5_vhca_state_event *event)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+ int err;
+
+ err = mlx5_cmd_query_vhca_state(dev, event->function_id, out, sizeof(out));
+ if (err)
+ return;
+
+ event->sw_function_id = MLX5_GET(query_vhca_state_out, out,
+ vhca_state_context.sw_function_id);
+ event->new_vhca_state = MLX5_GET(query_vhca_state_out, out,
+ vhca_state_context.vhca_state);
+
+ mlx5_vhca_event_arm(dev, event->function_id);
+ trace_mlx5_sf_vhca_event(dev, event);
+
+ blocking_notifier_call_chain(&dev->priv.vhca_state_notifier->n_head, 0, event);
+}
+
+static void mlx5_vhca_state_work_handler(struct work_struct *_work)
+{
+ struct mlx5_vhca_event_work *work = container_of(_work, struct mlx5_vhca_event_work, work);
+ struct mlx5_vhca_state_notifier *notifier = work->notifier;
+ struct mlx5_core_dev *dev = notifier->dev;
+
+ mlx5_vhca_event_notify(dev, &work->event);
+ kfree(work);
+}
+
+static int
+mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_vhca_state_notifier *notifier =
+ mlx5_nb_cof(nb, struct mlx5_vhca_state_notifier, nb);
+ struct mlx5_vhca_event_work *work;
+ struct mlx5_eqe *eqe = data;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return NOTIFY_DONE;
+ INIT_WORK(&work->work, &mlx5_vhca_state_work_handler);
+ work->notifier = notifier;
+ work->event.function_id = be16_to_cpu(eqe->data.vhca_state.function_id);
+ mlx5_events_work_enqueue(notifier->dev, &work->work);
+ return NOTIFY_OK;
+}
+
+void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap)
+{
+ if (!mlx5_vhca_event_supported(dev))
+ return;
+
+ MLX5_SET(cmd_hca_cap, set_hca_cap, vhca_state, 1);
+ MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_allocated, 1);
+ MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_active, 1);
+ MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_in_use, 1);
+ MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_teardown_request, 1);
+}
+
+int mlx5_vhca_event_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vhca_state_notifier *notifier;
+
+ if (!mlx5_vhca_event_supported(dev))
+ return 0;
+
+ notifier = kzalloc(sizeof(*notifier), GFP_KERNEL);
+ if (!notifier)
+ return -ENOMEM;
+
+ dev->priv.vhca_state_notifier = notifier;
+ notifier->dev = dev;
+ BLOCKING_INIT_NOTIFIER_HEAD(&notifier->n_head);
+ MLX5_NB_INIT(&notifier->nb, mlx5_vhca_state_change_notifier, VHCA_STATE_CHANGE);
+ return 0;
+}
+
+void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_vhca_event_supported(dev))
+ return;
+
+ kfree(dev->priv.vhca_state_notifier);
+ dev->priv.vhca_state_notifier = NULL;
+}
+
+void mlx5_vhca_event_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vhca_state_notifier *notifier;
+
+ if (!dev->priv.vhca_state_notifier)
+ return;
+
+ notifier = dev->priv.vhca_state_notifier;
+ mlx5_eq_notifier_register(dev, &notifier->nb);
+}
+
+void mlx5_vhca_event_stop(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vhca_state_notifier *notifier;
+
+ if (!dev->priv.vhca_state_notifier)
+ return;
+
+ notifier = dev->priv.vhca_state_notifier;
+ mlx5_eq_notifier_unregister(dev, &notifier->nb);
+}
+
+int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ if (!dev->priv.vhca_state_notifier)
+ return -EOPNOTSUPP;
+ return blocking_notifier_chain_register(&dev->priv.vhca_state_notifier->n_head, nb);
+}
+
+void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&dev->priv.vhca_state_notifier->n_head, nb);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h
new file mode 100644
index 000000000..013cdfe90
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_VHCA_EVENT_H__
+#define __MLX5_VHCA_EVENT_H__
+
+#ifdef CONFIG_MLX5_SF
+
+struct mlx5_vhca_state_event {
+ u16 function_id;
+ u16 sw_function_id;
+ u8 new_vhca_state;
+};
+
+static inline bool mlx5_vhca_event_supported(const struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN_MAX(dev, vhca_state);
+}
+
+void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap);
+int mlx5_vhca_event_init(struct mlx5_core_dev *dev);
+void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev);
+void mlx5_vhca_event_start(struct mlx5_core_dev *dev);
+void mlx5_vhca_event_stop(struct mlx5_core_dev *dev);
+int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb);
+void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb);
+int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id);
+int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id);
+int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id,
+ u32 *out, u32 outlen);
+#else
+
+static inline void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap)
+{
+}
+
+static inline int mlx5_vhca_event_init(struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
+static inline void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev)
+{
+}
+
+static inline void mlx5_vhca_event_start(struct mlx5_core_dev *dev)
+{
+}
+
+static inline void mlx5_vhca_event_stop(struct mlx5_core_dev *dev)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
new file mode 100644
index 000000000..5f2195e65
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+#include "mlx5_irq.h"
+#include "eswitch.h"
+
+static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ struct mlx5_hca_vport_context *in;
+ int err = 0;
+
+ /* Restore sriov guid and policy settings */
+ if (sriov->vfs_ctx[vf].node_guid ||
+ sriov->vfs_ctx[vf].port_guid ||
+ sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) {
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->node_guid = sriov->vfs_ctx[vf].node_guid;
+ in->port_guid = sriov->vfs_ctx[vf].port_guid;
+ in->policy = sriov->vfs_ctx[vf].policy;
+ in->field_select =
+ !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID |
+ !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID |
+ !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY;
+
+ err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in);
+ if (err)
+ mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf);
+
+ kfree(in);
+ }
+
+ return err;
+}
+
+static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err, vf, num_msix_count;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ goto enable_vfs_hca;
+
+ err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs);
+ if (err) {
+ mlx5_core_warn(dev,
+ "failed to enable eswitch SRIOV (%d)\n", err);
+ return err;
+ }
+
+enable_vfs_hca:
+ num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs);
+ for (vf = 0; vf < num_vfs; vf++) {
+ /* Notify the VF before its enablement to let it set
+ * some stuff.
+ */
+ blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
+ MLX5_PF_NOTIFY_ENABLE_VF, dev);
+ err = mlx5_core_enable_hca(dev, vf + 1);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
+ continue;
+ }
+
+ err = mlx5_set_msix_vec_count(dev, vf + 1, num_msix_count);
+ if (err) {
+ mlx5_core_warn(dev,
+ "failed to set MSI-X vector counts VF %d, err %d\n",
+ vf, err);
+ continue;
+ }
+
+ sriov->vfs_ctx[vf].enabled = 1;
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
+ err = sriov_restore_guids(dev, vf);
+ if (err) {
+ mlx5_core_warn(dev,
+ "failed to restore VF %d settings, err %d\n",
+ vf, err);
+ continue;
+ }
+ }
+ mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf);
+ }
+
+ return 0;
+}
+
+static void
+mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err;
+ int vf;
+
+ for (vf = num_vfs - 1; vf >= 0; vf--) {
+ if (!sriov->vfs_ctx[vf].enabled)
+ continue;
+ /* Notify the VF before its disablement to let it clean
+ * some resources.
+ */
+ blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
+ MLX5_PF_NOTIFY_DISABLE_VF, dev);
+ err = mlx5_core_disable_hca(dev, vf + 1);
+ if (err) {
+ mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
+ continue;
+ }
+ sriov->vfs_ctx[vf].enabled = 0;
+ }
+
+ mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf);
+
+ /* For ECPFs, skip waiting for host VF pages until ECPF is destroyed */
+ if (mlx5_core_is_ecpf(dev))
+ return;
+
+ if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF]))
+ mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
+}
+
+static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(dev);
+ int err;
+
+ devl_lock(devlink);
+ err = mlx5_device_enable_sriov(dev, num_vfs);
+ devl_unlock(devlink);
+ if (err) {
+ mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
+ return err;
+ }
+
+ err = pci_enable_sriov(pdev, num_vfs);
+ if (err) {
+ mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
+ mlx5_device_disable_sriov(dev, num_vfs, true);
+ }
+ return err;
+}
+
+void mlx5_sriov_disable(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(dev);
+ int num_vfs = pci_num_vf(dev->pdev);
+
+ pci_disable_sriov(pdev);
+ devl_lock(devlink);
+ mlx5_device_disable_sriov(dev, num_vfs, true);
+ devl_unlock(devlink);
+}
+
+int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err = 0;
+
+ mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs);
+
+ if (num_vfs)
+ err = mlx5_sriov_enable(pdev, num_vfs);
+ else
+ mlx5_sriov_disable(pdev);
+
+ if (!err)
+ sriov->num_vfs = num_vfs;
+ return err ? err : num_vfs;
+}
+
+int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count)
+{
+ struct pci_dev *pf = pci_physfn(vf);
+ struct mlx5_core_sriov *sriov;
+ struct mlx5_core_dev *dev;
+ int num_vf_msix, id;
+
+ dev = pci_get_drvdata(pf);
+ num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+ if (!num_vf_msix)
+ return -EOPNOTSUPP;
+
+ if (!msix_vec_count)
+ msix_vec_count =
+ mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf));
+
+ sriov = &dev->priv.sriov;
+ id = pci_iov_vf_id(vf);
+ if (id < 0 || !sriov->vfs_ctx[id].enabled)
+ return -EINVAL;
+
+ return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count);
+}
+
+int mlx5_sriov_attach(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev))
+ return 0;
+
+ /* If sriov VFs exist in PCI level, enable them in device level */
+ return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev));
+}
+
+void mlx5_sriov_detach(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_pf(dev))
+ return;
+
+ mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false);
+}
+
+static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
+{
+ u16 host_total_vfs;
+ const u32 *out;
+
+ if (mlx5_core_is_ecpf_esw_manager(dev)) {
+ out = mlx5_esw_query_functions(dev);
+
+ /* Old FW doesn't support getting total_vfs from esw func
+ * but supports getting it from pci_sriov.
+ */
+ if (IS_ERR(out))
+ goto done;
+ host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_total_vfs);
+ kvfree(out);
+ return host_total_vfs;
+ }
+
+done:
+ return pci_sriov_get_totalvfs(dev->pdev);
+}
+
+int mlx5_sriov_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ struct pci_dev *pdev = dev->pdev;
+ int total_vfs, i;
+
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ total_vfs = pci_sriov_get_totalvfs(pdev);
+ sriov->max_vfs = mlx5_get_max_vfs(dev);
+ sriov->num_vfs = pci_num_vf(pdev);
+ sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
+ if (!sriov->vfs_ctx)
+ return -ENOMEM;
+
+ for (i = 0; i < total_vfs; i++)
+ BLOCKING_INIT_NOTIFIER_HEAD(&sriov->vfs_ctx[i].notifier);
+
+ return 0;
+}
+
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ if (!mlx5_core_is_pf(dev))
+ return;
+
+ kfree(sriov->vfs_ctx);
+}
+
+/**
+ * mlx5_sriov_blocking_notifier_unregister - Unregister a VF from
+ * a notification block chain.
+ *
+ * @mdev: The mlx5 core device.
+ * @vf_id: The VF id.
+ * @nb: The notifier block to be unregistered.
+ */
+void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev,
+ int vf_id,
+ struct notifier_block *nb)
+{
+ struct mlx5_vf_context *vfs_ctx;
+ struct mlx5_core_sriov *sriov;
+
+ sriov = &mdev->priv.sriov;
+ if (WARN_ON(vf_id < 0 || vf_id >= sriov->num_vfs))
+ return;
+
+ vfs_ctx = &sriov->vfs_ctx[vf_id];
+ blocking_notifier_chain_unregister(&vfs_ctx->notifier, nb);
+}
+EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_unregister);
+
+/**
+ * mlx5_sriov_blocking_notifier_register - Register a VF notification
+ * block chain.
+ *
+ * @mdev: The mlx5 core device.
+ * @vf_id: The VF id.
+ * @nb: The notifier block to be called upon the VF events.
+ *
+ * Returns 0 on success or an error code.
+ */
+int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev,
+ int vf_id,
+ struct notifier_block *nb)
+{
+ struct mlx5_vf_context *vfs_ctx;
+ struct mlx5_core_sriov *sriov;
+
+ sriov = &mdev->priv.sriov;
+ if (vf_id < 0 || vf_id >= sriov->num_vfs)
+ return -EINVAL;
+
+ vfs_ctx = &sriov->vfs_ctx[vf_id];
+ return blocking_notifier_chain_register(&vfs_ctx->notifier, nb);
+}
+EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_register);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile
new file mode 100644
index 000000000..c78512eed
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
new file mode 100644
index 000000000..bf7517725
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -0,0 +1,2003 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+#include "dr_ste.h"
+
+enum dr_action_domain {
+ DR_ACTION_DOMAIN_NIC_INGRESS,
+ DR_ACTION_DOMAIN_NIC_EGRESS,
+ DR_ACTION_DOMAIN_FDB_INGRESS,
+ DR_ACTION_DOMAIN_FDB_EGRESS,
+ DR_ACTION_DOMAIN_MAX,
+};
+
+enum dr_action_valid_state {
+ DR_ACTION_STATE_ERR,
+ DR_ACTION_STATE_NO_ACTION,
+ DR_ACTION_STATE_ENCAP,
+ DR_ACTION_STATE_DECAP,
+ DR_ACTION_STATE_MODIFY_HDR,
+ DR_ACTION_STATE_POP_VLAN,
+ DR_ACTION_STATE_PUSH_VLAN,
+ DR_ACTION_STATE_NON_TERM,
+ DR_ACTION_STATE_TERM,
+ DR_ACTION_STATE_ASO,
+ DR_ACTION_STATE_MAX,
+};
+
+static const char * const action_type_to_str[] = {
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = "DR_ACTION_TYP_TNL_L2_TO_L2",
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = "DR_ACTION_TYP_L2_TO_TNL_L2",
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = "DR_ACTION_TYP_TNL_L3_TO_L2",
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = "DR_ACTION_TYP_L2_TO_TNL_L3",
+ [DR_ACTION_TYP_DROP] = "DR_ACTION_TYP_DROP",
+ [DR_ACTION_TYP_QP] = "DR_ACTION_TYP_QP",
+ [DR_ACTION_TYP_FT] = "DR_ACTION_TYP_FT",
+ [DR_ACTION_TYP_CTR] = "DR_ACTION_TYP_CTR",
+ [DR_ACTION_TYP_TAG] = "DR_ACTION_TYP_TAG",
+ [DR_ACTION_TYP_MODIFY_HDR] = "DR_ACTION_TYP_MODIFY_HDR",
+ [DR_ACTION_TYP_VPORT] = "DR_ACTION_TYP_VPORT",
+ [DR_ACTION_TYP_POP_VLAN] = "DR_ACTION_TYP_POP_VLAN",
+ [DR_ACTION_TYP_PUSH_VLAN] = "DR_ACTION_TYP_PUSH_VLAN",
+ [DR_ACTION_TYP_SAMPLER] = "DR_ACTION_TYP_SAMPLER",
+ [DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR",
+ [DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR",
+ [DR_ACTION_TYP_ASO_FLOW_METER] = "DR_ACTION_TYP_ASO_FLOW_METER",
+ [DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN",
+};
+
+static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id)
+{
+ if (action_id > DR_ACTION_TYP_MAX)
+ action_id = DR_ACTION_TYP_MAX;
+ return action_type_to_str[action_id];
+}
+
+static const enum dr_action_valid_state
+next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = {
+ [DR_ACTION_DOMAIN_NIC_INGRESS] = {
+ [DR_ACTION_STATE_NO_ACTION] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_DECAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ENCAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_MODIFY_HDR] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_POP_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_PUSH_VLAN] = {
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_NON_TERM] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_TERM] = {
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
+ },
+ },
+ [DR_ACTION_DOMAIN_NIC_EGRESS] = {
+ [DR_ACTION_STATE_NO_ACTION] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_DECAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ENCAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_MODIFY_HDR] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_POP_VLAN] = {
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_PUSH_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_NON_TERM] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_TERM] = {
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
+ },
+ },
+ [DR_ACTION_DOMAIN_FDB_INGRESS] = {
+ [DR_ACTION_STATE_NO_ACTION] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_DECAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ENCAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_MODIFY_HDR] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_POP_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_PUSH_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_NON_TERM] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_TERM] = {
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
+ },
+ },
+ [DR_ACTION_DOMAIN_FDB_EGRESS] = {
+ [DR_ACTION_STATE_NO_ACTION] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_DECAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ENCAP] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_MODIFY_HDR] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_POP_VLAN] = {
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_PUSH_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_NON_TERM] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_ASO] = {
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
+ },
+ [DR_ACTION_STATE_TERM] = {
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
+ },
+ },
+};
+
+static int
+dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type,
+ enum mlx5dr_action_type *action_type)
+{
+ switch (reformat_type) {
+ case DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2:
+ *action_type = DR_ACTION_TYP_TNL_L2_TO_L2;
+ break;
+ case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2:
+ *action_type = DR_ACTION_TYP_L2_TO_TNL_L2;
+ break;
+ case DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2:
+ *action_type = DR_ACTION_TYP_TNL_L3_TO_L2;
+ break;
+ case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3:
+ *action_type = DR_ACTION_TYP_L2_TO_TNL_L3;
+ break;
+ case DR_ACTION_REFORMAT_TYP_INSERT_HDR:
+ *action_type = DR_ACTION_TYP_INSERT_HDR;
+ break;
+ case DR_ACTION_REFORMAT_TYP_REMOVE_HDR:
+ *action_type = DR_ACTION_TYP_REMOVE_HDR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Apply the actions on the rule STE array starting from the last_ste.
+ * Actions might require more than one STE, new_num_stes will return
+ * the new size of the STEs array, rule with actions.
+ */
+static void dr_actions_apply(struct mlx5dr_domain *dmn,
+ enum mlx5dr_domain_nic_type nic_type,
+ u8 *action_type_set,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *new_num_stes)
+{
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
+ u32 added_stes = 0;
+
+ if (nic_type == DR_DOMAIN_NIC_TYPE_RX)
+ mlx5dr_ste_set_actions_rx(ste_ctx, dmn, action_type_set,
+ last_ste, attr, &added_stes);
+ else
+ mlx5dr_ste_set_actions_tx(ste_ctx, dmn, action_type_set,
+ last_ste, attr, &added_stes);
+
+ *new_num_stes += added_stes;
+}
+
+static enum dr_action_domain
+dr_action_get_action_domain(enum mlx5dr_domain_type domain,
+ enum mlx5dr_domain_nic_type nic_type)
+{
+ switch (domain) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ return DR_ACTION_DOMAIN_NIC_INGRESS;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ return DR_ACTION_DOMAIN_NIC_EGRESS;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ if (nic_type == DR_DOMAIN_NIC_TYPE_RX)
+ return DR_ACTION_DOMAIN_FDB_INGRESS;
+ return DR_ACTION_DOMAIN_FDB_EGRESS;
+ default:
+ WARN_ON(true);
+ return DR_ACTION_DOMAIN_MAX;
+ }
+}
+
+static
+int dr_action_validate_and_get_next_state(enum dr_action_domain action_domain,
+ u32 action_type,
+ u32 *state)
+{
+ u32 cur_state = *state;
+
+ /* Check action state machine is valid */
+ *state = next_action_state[action_domain][cur_state][action_type];
+
+ if (*state == DR_ACTION_STATE_ERR)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *dest_action,
+ u64 *final_icm_addr)
+{
+ int ret;
+
+ switch (dest_action->action_type) {
+ case DR_ACTION_TYP_FT:
+ /* Allow destination flow table only if table is a terminating
+ * table, since there is an *assumption* that in such case FW
+ * will recalculate the CS.
+ */
+ if (dest_action->dest_tbl->is_fw_tbl) {
+ *final_icm_addr = dest_action->dest_tbl->fw_tbl.rx_icm_addr;
+ } else {
+ mlx5dr_dbg(dmn,
+ "Destination FT should be terminating when modify TTL is used\n");
+ return -EINVAL;
+ }
+ break;
+
+ case DR_ACTION_TYP_VPORT:
+ /* If destination is vport we will get the FW flow table
+ * that recalculates the CS and forwards to the vport.
+ */
+ ret = mlx5dr_domain_get_recalc_cs_ft_addr(dest_action->vport->dmn,
+ dest_action->vport->caps->num,
+ final_icm_addr);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n");
+ return ret;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_actions_attr *attr,
+ bool rx_rule,
+ bool *recalc_cs_required)
+{
+ *recalc_cs_required = false;
+
+ /* if device supports csum recalculation - no adjustment needed */
+ if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps))
+ return;
+
+ /* no adjustment needed on TX rules */
+ if (!rx_rule)
+ return;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) {
+ /* Ignore the modify TTL action.
+ * It is always kept as last HW action.
+ */
+ attr->modify_actions--;
+ return;
+ }
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB)
+ /* Due to a HW bug on some devices, modifying TTL on RX flows
+ * will cause an incorrect checksum calculation. In such cases
+ * we will use a FW table to recalculate the checksum.
+ */
+ *recalc_cs_required = true;
+}
+
+static void dr_action_print_sequence(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *actions[],
+ int last_idx)
+{
+ int i;
+
+ for (i = 0; i <= last_idx; i++)
+ mlx5dr_err(dmn, "< %s (%d) > ",
+ dr_action_id_to_str(actions[i]->action_type),
+ actions[i]->action_type);
+}
+
+#define WITH_VLAN_NUM_HW_ACTIONS 6
+
+int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_action *actions[],
+ u32 num_actions,
+ u8 *ste_arr,
+ u32 *new_hw_ste_arr_sz)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ bool rx_rule = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ u8 action_type_set[DR_ACTION_TYP_MAX] = {};
+ struct mlx5dr_ste_actions_attr attr = {};
+ struct mlx5dr_action *dest_action = NULL;
+ u32 state = DR_ACTION_STATE_NO_ACTION;
+ enum dr_action_domain action_domain;
+ bool recalc_cs_required = false;
+ u8 *last_ste;
+ int i, ret;
+
+ attr.gvmi = dmn->info.caps.gvmi;
+ attr.hit_gvmi = dmn->info.caps.gvmi;
+ attr.final_icm_addr = nic_dmn->default_icm_addr;
+ action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->type);
+
+ for (i = 0; i < num_actions; i++) {
+ struct mlx5dr_action_dest_tbl *dest_tbl;
+ struct mlx5dr_icm_chunk *chunk;
+ struct mlx5dr_action *action;
+ int max_actions_type = 1;
+ u32 action_type;
+
+ action = actions[i];
+ action_type = action->action_type;
+
+ switch (action_type) {
+ case DR_ACTION_TYP_DROP:
+ attr.final_icm_addr = nic_dmn->drop_icm_addr;
+ attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48;
+ break;
+ case DR_ACTION_TYP_FT:
+ dest_action = action;
+ dest_tbl = action->dest_tbl;
+ if (!dest_tbl->is_fw_tbl) {
+ if (dest_tbl->tbl->dmn != dmn) {
+ mlx5dr_err(dmn,
+ "Destination table belongs to a different domain\n");
+ return -EINVAL;
+ }
+ if (dest_tbl->tbl->level <= matcher->tbl->level) {
+ mlx5_core_dbg_once(dmn->mdev,
+ "Connecting table to a lower/same level destination table\n");
+ mlx5dr_dbg(dmn,
+ "Connecting table at level %d to a destination table at level %d\n",
+ matcher->tbl->level,
+ dest_tbl->tbl->level);
+ }
+ chunk = rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk :
+ dest_tbl->tbl->tx.s_anchor->chunk;
+ attr.final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk);
+ } else {
+ struct mlx5dr_cmd_query_flow_table_details output;
+ int ret;
+
+ /* get the relevant addresses */
+ if (!action->dest_tbl->fw_tbl.rx_icm_addr) {
+ ret = mlx5dr_cmd_query_flow_table(dmn->mdev,
+ dest_tbl->fw_tbl.type,
+ dest_tbl->fw_tbl.id,
+ &output);
+ if (!ret) {
+ dest_tbl->fw_tbl.tx_icm_addr =
+ output.sw_owner_icm_root_1;
+ dest_tbl->fw_tbl.rx_icm_addr =
+ output.sw_owner_icm_root_0;
+ } else {
+ mlx5dr_err(dmn,
+ "Failed mlx5_cmd_query_flow_table ret: %d\n",
+ ret);
+ return ret;
+ }
+ }
+ attr.final_icm_addr = rx_rule ?
+ dest_tbl->fw_tbl.rx_icm_addr :
+ dest_tbl->fw_tbl.tx_icm_addr;
+ }
+ break;
+ case DR_ACTION_TYP_QP:
+ mlx5dr_info(dmn, "Domain doesn't support QP\n");
+ return -EOPNOTSUPP;
+ case DR_ACTION_TYP_CTR:
+ attr.ctr_id = action->ctr->ctr_id +
+ action->ctr->offset;
+ break;
+ case DR_ACTION_TYP_TAG:
+ attr.flow_tag = action->flow_tag->flow_tag;
+ break;
+ case DR_ACTION_TYP_TNL_L2_TO_L2:
+ break;
+ case DR_ACTION_TYP_TNL_L3_TO_L2:
+ attr.decap_index = action->rewrite->index;
+ attr.decap_actions = action->rewrite->num_of_actions;
+ attr.decap_with_vlan =
+ attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS;
+ break;
+ case DR_ACTION_TYP_MODIFY_HDR:
+ attr.modify_index = action->rewrite->index;
+ attr.modify_actions = action->rewrite->num_of_actions;
+ if (action->rewrite->modify_ttl)
+ dr_action_modify_ttl_adjust(dmn, &attr, rx_rule,
+ &recalc_cs_required);
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L2:
+ case DR_ACTION_TYP_L2_TO_TNL_L3:
+ if (rx_rule &&
+ !(dmn->ste_ctx->actions_caps & DR_STE_CTX_ACTION_CAP_RX_ENCAP)) {
+ mlx5dr_info(dmn, "Device doesn't support Encap on RX\n");
+ return -EOPNOTSUPP;
+ }
+ attr.reformat.size = action->reformat->size;
+ attr.reformat.id = action->reformat->id;
+ break;
+ case DR_ACTION_TYP_SAMPLER:
+ attr.final_icm_addr = rx_rule ? action->sampler->rx_icm_addr :
+ action->sampler->tx_icm_addr;
+ break;
+ case DR_ACTION_TYP_VPORT:
+ if (unlikely(rx_rule && action->vport->caps->num == MLX5_VPORT_UPLINK)) {
+ /* can't go to uplink on RX rule - dropping instead */
+ attr.final_icm_addr = nic_dmn->drop_icm_addr;
+ attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48;
+ } else {
+ attr.hit_gvmi = action->vport->caps->vhca_gvmi;
+ dest_action = action;
+ attr.final_icm_addr = rx_rule ?
+ action->vport->caps->icm_address_rx :
+ action->vport->caps->icm_address_tx;
+ }
+ break;
+ case DR_ACTION_TYP_POP_VLAN:
+ if (!rx_rule && !(dmn->ste_ctx->actions_caps &
+ DR_STE_CTX_ACTION_CAP_TX_POP)) {
+ mlx5dr_dbg(dmn, "Device doesn't support POP VLAN action on TX\n");
+ return -EOPNOTSUPP;
+ }
+
+ max_actions_type = MLX5DR_MAX_VLANS;
+ attr.vlans.count++;
+ break;
+ case DR_ACTION_TYP_PUSH_VLAN:
+ if (rx_rule && !(dmn->ste_ctx->actions_caps &
+ DR_STE_CTX_ACTION_CAP_RX_PUSH)) {
+ mlx5dr_dbg(dmn, "Device doesn't support PUSH VLAN action on RX\n");
+ return -EOPNOTSUPP;
+ }
+
+ max_actions_type = MLX5DR_MAX_VLANS;
+ if (attr.vlans.count == MLX5DR_MAX_VLANS) {
+ mlx5dr_dbg(dmn, "Max VLAN push/pop count exceeded\n");
+ return -EINVAL;
+ }
+
+ attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr;
+ break;
+ case DR_ACTION_TYP_INSERT_HDR:
+ case DR_ACTION_TYP_REMOVE_HDR:
+ attr.reformat.size = action->reformat->size;
+ attr.reformat.id = action->reformat->id;
+ attr.reformat.param_0 = action->reformat->param_0;
+ attr.reformat.param_1 = action->reformat->param_1;
+ break;
+ case DR_ACTION_TYP_ASO_FLOW_METER:
+ attr.aso_flow_meter.obj_id = action->aso->obj_id;
+ attr.aso_flow_meter.offset = action->aso->offset;
+ attr.aso_flow_meter.dest_reg_id = action->aso->dest_reg_id;
+ attr.aso_flow_meter.init_color = action->aso->init_color;
+ break;
+ default:
+ mlx5dr_err(dmn, "Unsupported action type %d\n", action_type);
+ return -EINVAL;
+ }
+
+ /* Check action duplication */
+ if (++action_type_set[action_type] > max_actions_type) {
+ mlx5dr_err(dmn, "Action type %d supports only max %d time(s)\n",
+ action_type, max_actions_type);
+ return -EINVAL;
+ }
+
+ /* Check action state machine is valid */
+ if (dr_action_validate_and_get_next_state(action_domain,
+ action_type,
+ &state)) {
+ mlx5dr_err(dmn, "Invalid action (gvmi: %d, is_rx: %d) sequence provided:",
+ attr.gvmi, rx_rule);
+ dr_action_print_sequence(dmn, actions, i);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ *new_hw_ste_arr_sz = nic_matcher->num_of_builders;
+ last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1);
+
+ if (recalc_cs_required && dest_action) {
+ ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr);
+ if (ret) {
+ mlx5dr_err(dmn,
+ "Failed to handle checksum recalculation err %d\n",
+ ret);
+ return ret;
+ }
+ }
+
+ dr_actions_apply(dmn,
+ nic_dmn->type,
+ action_type_set,
+ last_ste,
+ &attr,
+ new_hw_ste_arr_sz);
+
+ return 0;
+}
+
+static unsigned int action_size[DR_ACTION_TYP_MAX] = {
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = sizeof(struct mlx5dr_action_rewrite),
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_FT] = sizeof(struct mlx5dr_action_dest_tbl),
+ [DR_ACTION_TYP_CTR] = sizeof(struct mlx5dr_action_ctr),
+ [DR_ACTION_TYP_TAG] = sizeof(struct mlx5dr_action_flow_tag),
+ [DR_ACTION_TYP_MODIFY_HDR] = sizeof(struct mlx5dr_action_rewrite),
+ [DR_ACTION_TYP_VPORT] = sizeof(struct mlx5dr_action_vport),
+ [DR_ACTION_TYP_PUSH_VLAN] = sizeof(struct mlx5dr_action_push_vlan),
+ [DR_ACTION_TYP_INSERT_HDR] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_REMOVE_HDR] = sizeof(struct mlx5dr_action_reformat),
+ [DR_ACTION_TYP_SAMPLER] = sizeof(struct mlx5dr_action_sampler),
+ [DR_ACTION_TYP_ASO_FLOW_METER] = sizeof(struct mlx5dr_action_aso_flow_meter),
+};
+
+static struct mlx5dr_action *
+dr_action_create_generic(enum mlx5dr_action_type action_type)
+{
+ struct mlx5dr_action *action;
+ int extra_size;
+
+ if (action_type < DR_ACTION_TYP_MAX)
+ extra_size = action_size[action_type];
+ else
+ return NULL;
+
+ action = kzalloc(sizeof(*action) + extra_size, GFP_KERNEL);
+ if (!action)
+ return NULL;
+
+ action->action_type = action_type;
+ refcount_set(&action->refcount, 1);
+ action->data = action + 1;
+
+ return action;
+}
+
+struct mlx5dr_action *mlx5dr_action_create_drop(void)
+{
+ return dr_action_create_generic(DR_ACTION_TYP_DROP);
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num)
+{
+ struct mlx5dr_action *action;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_FT);
+ if (!action)
+ return NULL;
+
+ action->dest_tbl->is_fw_tbl = true;
+ action->dest_tbl->fw_tbl.dmn = dmn;
+ action->dest_tbl->fw_tbl.id = table_num;
+ action->dest_tbl->fw_tbl.type = FS_FT_FDB;
+ refcount_inc(&dmn->refcount);
+
+ return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl)
+{
+ struct mlx5dr_action *action;
+
+ refcount_inc(&tbl->refcount);
+
+ action = dr_action_create_generic(DR_ACTION_TYP_FT);
+ if (!action)
+ goto dec_ref;
+
+ action->dest_tbl->tbl = tbl;
+
+ return action;
+
+dec_ref:
+ refcount_dec(&tbl->refcount);
+ return NULL;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action_dest *dests,
+ u32 num_of_dests,
+ bool ignore_flow_level,
+ u32 flow_source)
+{
+ struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
+ struct mlx5dr_action **ref_actions;
+ struct mlx5dr_action *action;
+ bool reformat_req = false;
+ u32 num_of_ref = 0;
+ u32 ref_act_cnt;
+ int ret;
+ int i;
+
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) {
+ mlx5dr_err(dmn, "Multiple destination support is for FDB only\n");
+ return NULL;
+ }
+
+ hw_dests = kcalloc(num_of_dests, sizeof(*hw_dests), GFP_KERNEL);
+ if (!hw_dests)
+ return NULL;
+
+ if (unlikely(check_mul_overflow(num_of_dests, 2u, &ref_act_cnt)))
+ goto free_hw_dests;
+
+ ref_actions = kcalloc(ref_act_cnt, sizeof(*ref_actions), GFP_KERNEL);
+ if (!ref_actions)
+ goto free_hw_dests;
+
+ for (i = 0; i < num_of_dests; i++) {
+ struct mlx5dr_action *reformat_action = dests[i].reformat;
+ struct mlx5dr_action *dest_action = dests[i].dest;
+
+ ref_actions[num_of_ref++] = dest_action;
+
+ switch (dest_action->action_type) {
+ case DR_ACTION_TYP_VPORT:
+ hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ hw_dests[i].vport.num = dest_action->vport->caps->num;
+ hw_dests[i].vport.vhca_id = dest_action->vport->caps->vhca_gvmi;
+ if (reformat_action) {
+ reformat_req = true;
+ hw_dests[i].vport.reformat_id =
+ reformat_action->reformat->id;
+ ref_actions[num_of_ref++] = reformat_action;
+ hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ }
+ break;
+
+ case DR_ACTION_TYP_FT:
+ hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ if (dest_action->dest_tbl->is_fw_tbl)
+ hw_dests[i].ft_id = dest_action->dest_tbl->fw_tbl.id;
+ else
+ hw_dests[i].ft_id = dest_action->dest_tbl->tbl->table_id;
+ break;
+
+ default:
+ mlx5dr_dbg(dmn, "Invalid multiple destinations action\n");
+ goto free_ref_actions;
+ }
+ }
+
+ action = dr_action_create_generic(DR_ACTION_TYP_FT);
+ if (!action)
+ goto free_ref_actions;
+
+ ret = mlx5dr_fw_create_md_tbl(dmn,
+ hw_dests,
+ num_of_dests,
+ reformat_req,
+ &action->dest_tbl->fw_tbl.id,
+ &action->dest_tbl->fw_tbl.group_id,
+ ignore_flow_level,
+ flow_source);
+ if (ret)
+ goto free_action;
+
+ refcount_inc(&dmn->refcount);
+
+ for (i = 0; i < num_of_ref; i++)
+ refcount_inc(&ref_actions[i]->refcount);
+
+ action->dest_tbl->is_fw_tbl = true;
+ action->dest_tbl->fw_tbl.dmn = dmn;
+ action->dest_tbl->fw_tbl.type = FS_FT_FDB;
+ action->dest_tbl->fw_tbl.ref_actions = ref_actions;
+ action->dest_tbl->fw_tbl.num_of_ref_actions = num_of_ref;
+
+ kfree(hw_dests);
+
+ return action;
+
+free_action:
+ kfree(action);
+free_ref_actions:
+ kfree(ref_actions);
+free_hw_dests:
+ kfree(hw_dests);
+ return NULL;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn,
+ struct mlx5_flow_table *ft)
+{
+ struct mlx5dr_action *action;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_FT);
+ if (!action)
+ return NULL;
+
+ action->dest_tbl->is_fw_tbl = 1;
+ action->dest_tbl->fw_tbl.type = ft->type;
+ action->dest_tbl->fw_tbl.id = ft->id;
+ action->dest_tbl->fw_tbl.dmn = dmn;
+
+ refcount_inc(&dmn->refcount);
+
+ return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_flow_counter(u32 counter_id)
+{
+ struct mlx5dr_action *action;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_CTR);
+ if (!action)
+ return NULL;
+
+ action->ctr->ctr_id = counter_id;
+
+ return action;
+}
+
+struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value)
+{
+ struct mlx5dr_action *action;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_TAG);
+ if (!action)
+ return NULL;
+
+ action->flow_tag->flow_tag = tag_value & 0xffffff;
+
+ return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id)
+{
+ struct mlx5dr_action *action;
+ u64 icm_rx, icm_tx;
+ int ret;
+
+ ret = mlx5dr_cmd_query_flow_sampler(dmn->mdev, sampler_id,
+ &icm_rx, &icm_tx);
+ if (ret)
+ return NULL;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_SAMPLER);
+ if (!action)
+ return NULL;
+
+ action->sampler->dmn = dmn;
+ action->sampler->sampler_id = sampler_id;
+ action->sampler->rx_icm_addr = icm_rx;
+ action->sampler->tx_icm_addr = icm_tx;
+
+ refcount_inc(&dmn->refcount);
+ return action;
+}
+
+static int
+dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type,
+ struct mlx5dr_domain *dmn,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
+ size_t data_sz,
+ void *data)
+{
+ if (reformat_type == DR_ACTION_TYP_INSERT_HDR) {
+ if ((!data && data_sz) || (data && !data_sz) ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_size) < data_sz ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_offset) < reformat_param_1) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameters for INSERT_HDR\n");
+ goto out_err;
+ }
+ } else if (reformat_type == DR_ACTION_TYP_REMOVE_HDR) {
+ if (data ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_size) < data_sz ||
+ MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_offset) < reformat_param_1) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameters for REMOVE_HDR\n");
+ goto out_err;
+ }
+ } else if (reformat_param_0 || reformat_param_1 ||
+ reformat_type > DR_ACTION_TYP_REMOVE_HDR) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameters\n");
+ goto out_err;
+ }
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB)
+ return 0;
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
+ if (reformat_type != DR_ACTION_TYP_TNL_L2_TO_L2 &&
+ reformat_type != DR_ACTION_TYP_TNL_L3_TO_L2) {
+ mlx5dr_dbg(dmn, "Action reformat type not support on RX domain\n");
+ goto out_err;
+ }
+ } else if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
+ if (reformat_type != DR_ACTION_TYP_L2_TO_TNL_L2 &&
+ reformat_type != DR_ACTION_TYP_L2_TO_TNL_L3) {
+ mlx5dr_dbg(dmn, "Action reformat type not support on TX domain\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -EINVAL;
+}
+
+#define ACTION_CACHE_LINE_SIZE 64
+
+static int
+dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
+ u8 reformat_param_0, u8 reformat_param_1,
+ size_t data_sz, void *data,
+ struct mlx5dr_action *action)
+{
+ u32 reformat_id;
+ int ret;
+
+ switch (action->action_type) {
+ case DR_ACTION_TYP_L2_TO_TNL_L2:
+ case DR_ACTION_TYP_L2_TO_TNL_L3:
+ {
+ enum mlx5_reformat_ctx_type rt;
+
+ if (action->action_type == DR_ACTION_TYP_L2_TO_TNL_L2)
+ rt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+ else
+ rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+
+ ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, 0, 0,
+ data_sz, data,
+ &reformat_id);
+ if (ret)
+ return ret;
+
+ action->reformat->id = reformat_id;
+ action->reformat->size = data_sz;
+ return 0;
+ }
+ case DR_ACTION_TYP_TNL_L2_TO_L2:
+ {
+ return 0;
+ }
+ case DR_ACTION_TYP_TNL_L3_TO_L2:
+ {
+ u8 *hw_actions;
+ int ret;
+
+ hw_actions = kzalloc(ACTION_CACHE_LINE_SIZE, GFP_KERNEL);
+ if (!hw_actions)
+ return -ENOMEM;
+
+ ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx,
+ data, data_sz,
+ hw_actions,
+ ACTION_CACHE_LINE_SIZE,
+ &action->rewrite->num_of_actions);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n");
+ kfree(hw_actions);
+ return ret;
+ }
+
+ action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
+ DR_CHUNK_SIZE_8);
+ if (!action->rewrite->chunk) {
+ mlx5dr_dbg(dmn, "Failed allocating modify header chunk\n");
+ kfree(hw_actions);
+ return -ENOMEM;
+ }
+
+ action->rewrite->data = (void *)hw_actions;
+ action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr
+ (action->rewrite->chunk) -
+ dmn->info.caps.hdr_modify_icm_addr) /
+ ACTION_CACHE_LINE_SIZE;
+
+ ret = mlx5dr_send_postsend_action(dmn, action);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Writing decap l3 actions to ICM failed\n");
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ kfree(hw_actions);
+ return ret;
+ }
+ return 0;
+ }
+ case DR_ACTION_TYP_INSERT_HDR:
+ ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev,
+ MLX5_REFORMAT_TYPE_INSERT_HDR,
+ reformat_param_0,
+ reformat_param_1,
+ data_sz, data,
+ &reformat_id);
+ if (ret)
+ return ret;
+
+ action->reformat->id = reformat_id;
+ action->reformat->size = data_sz;
+ action->reformat->param_0 = reformat_param_0;
+ action->reformat->param_1 = reformat_param_1;
+ return 0;
+ case DR_ACTION_TYP_REMOVE_HDR:
+ action->reformat->id = 0;
+ action->reformat->size = data_sz;
+ action->reformat->param_0 = reformat_param_0;
+ action->reformat->param_1 = reformat_param_1;
+ return 0;
+ default:
+ mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type);
+ return -EINVAL;
+ }
+}
+
+#define CVLAN_ETHERTYPE 0x8100
+#define SVLAN_ETHERTYPE 0x88a8
+
+struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void)
+{
+ return dr_action_create_generic(DR_ACTION_TYP_POP_VLAN);
+}
+
+struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn,
+ __be32 vlan_hdr)
+{
+ u32 vlan_hdr_h = ntohl(vlan_hdr);
+ u16 ethertype = vlan_hdr_h >> 16;
+ struct mlx5dr_action *action;
+
+ if (ethertype != SVLAN_ETHERTYPE && ethertype != CVLAN_ETHERTYPE) {
+ mlx5dr_dbg(dmn, "Invalid vlan ethertype\n");
+ return NULL;
+ }
+
+ action = dr_action_create_generic(DR_ACTION_TYP_PUSH_VLAN);
+ if (!action)
+ return NULL;
+
+ action->push_vlan->vlan_hdr = vlan_hdr_h;
+ return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
+ enum mlx5dr_action_reformat_type reformat_type,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
+ size_t data_sz,
+ void *data)
+{
+ enum mlx5dr_action_type action_type;
+ struct mlx5dr_action *action;
+ int ret;
+
+ refcount_inc(&dmn->refcount);
+
+ /* General checks */
+ ret = dr_action_reformat_to_action_type(reformat_type, &action_type);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Invalid reformat_type provided\n");
+ goto dec_ref;
+ }
+
+ ret = dr_action_verify_reformat_params(action_type, dmn,
+ reformat_param_0, reformat_param_1,
+ data_sz, data);
+ if (ret)
+ goto dec_ref;
+
+ action = dr_action_create_generic(action_type);
+ if (!action)
+ goto dec_ref;
+
+ action->reformat->dmn = dmn;
+
+ ret = dr_action_create_reformat_action(dmn,
+ reformat_param_0,
+ reformat_param_1,
+ data_sz,
+ data,
+ action);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed creating reformat action %d\n", ret);
+ goto free_action;
+ }
+
+ return action;
+
+free_action:
+ kfree(action);
+dec_ref:
+ refcount_dec(&dmn->refcount);
+ return NULL;
+}
+
+static int
+dr_action_modify_sw_to_hw_add(struct mlx5dr_domain *dmn,
+ __be64 *sw_action,
+ __be64 *hw_action,
+ const struct mlx5dr_ste_action_modify_field **ret_hw_info)
+{
+ const struct mlx5dr_ste_action_modify_field *hw_action_info;
+ u8 max_length;
+ u16 sw_field;
+ u32 data;
+
+ /* Get SW modify action data */
+ sw_field = MLX5_GET(set_action_in, sw_action, field);
+ data = MLX5_GET(set_action_in, sw_action, data);
+
+ /* Convert SW data to HW modify action format */
+ hw_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, sw_field);
+ if (!hw_action_info) {
+ mlx5dr_dbg(dmn, "Modify add action invalid field given\n");
+ return -EINVAL;
+ }
+
+ max_length = hw_action_info->end - hw_action_info->start + 1;
+
+ mlx5dr_ste_set_action_add(dmn->ste_ctx,
+ hw_action,
+ hw_action_info->hw_field,
+ hw_action_info->start,
+ max_length,
+ data);
+
+ *ret_hw_info = hw_action_info;
+
+ return 0;
+}
+
+static int
+dr_action_modify_sw_to_hw_set(struct mlx5dr_domain *dmn,
+ __be64 *sw_action,
+ __be64 *hw_action,
+ const struct mlx5dr_ste_action_modify_field **ret_hw_info)
+{
+ const struct mlx5dr_ste_action_modify_field *hw_action_info;
+ u8 offset, length, max_length;
+ u16 sw_field;
+ u32 data;
+
+ /* Get SW modify action data */
+ length = MLX5_GET(set_action_in, sw_action, length);
+ offset = MLX5_GET(set_action_in, sw_action, offset);
+ sw_field = MLX5_GET(set_action_in, sw_action, field);
+ data = MLX5_GET(set_action_in, sw_action, data);
+
+ /* Convert SW data to HW modify action format */
+ hw_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, sw_field);
+ if (!hw_action_info) {
+ mlx5dr_dbg(dmn, "Modify set action invalid field given\n");
+ return -EINVAL;
+ }
+
+ /* PRM defines that length zero specific length of 32bits */
+ length = length ? length : 32;
+
+ max_length = hw_action_info->end - hw_action_info->start + 1;
+
+ if (length + offset > max_length) {
+ mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n");
+ return -EINVAL;
+ }
+
+ mlx5dr_ste_set_action_set(dmn->ste_ctx,
+ hw_action,
+ hw_action_info->hw_field,
+ hw_action_info->start + offset,
+ length,
+ data);
+
+ *ret_hw_info = hw_action_info;
+
+ return 0;
+}
+
+static int
+dr_action_modify_sw_to_hw_copy(struct mlx5dr_domain *dmn,
+ __be64 *sw_action,
+ __be64 *hw_action,
+ const struct mlx5dr_ste_action_modify_field **ret_dst_hw_info,
+ const struct mlx5dr_ste_action_modify_field **ret_src_hw_info)
+{
+ u8 src_offset, dst_offset, src_max_length, dst_max_length, length;
+ const struct mlx5dr_ste_action_modify_field *hw_dst_action_info;
+ const struct mlx5dr_ste_action_modify_field *hw_src_action_info;
+ u16 src_field, dst_field;
+
+ /* Get SW modify action data */
+ src_field = MLX5_GET(copy_action_in, sw_action, src_field);
+ dst_field = MLX5_GET(copy_action_in, sw_action, dst_field);
+ src_offset = MLX5_GET(copy_action_in, sw_action, src_offset);
+ dst_offset = MLX5_GET(copy_action_in, sw_action, dst_offset);
+ length = MLX5_GET(copy_action_in, sw_action, length);
+
+ /* Convert SW data to HW modify action format */
+ hw_src_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, src_field);
+ hw_dst_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, dst_field);
+ if (!hw_src_action_info || !hw_dst_action_info) {
+ mlx5dr_dbg(dmn, "Modify copy action invalid field given\n");
+ return -EINVAL;
+ }
+
+ /* PRM defines that length zero specific length of 32bits */
+ length = length ? length : 32;
+
+ src_max_length = hw_src_action_info->end -
+ hw_src_action_info->start + 1;
+ dst_max_length = hw_dst_action_info->end -
+ hw_dst_action_info->start + 1;
+
+ if (length + src_offset > src_max_length ||
+ length + dst_offset > dst_max_length) {
+ mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n");
+ return -EINVAL;
+ }
+
+ mlx5dr_ste_set_action_copy(dmn->ste_ctx,
+ hw_action,
+ hw_dst_action_info->hw_field,
+ hw_dst_action_info->start + dst_offset,
+ length,
+ hw_src_action_info->hw_field,
+ hw_src_action_info->start + src_offset);
+
+ *ret_dst_hw_info = hw_dst_action_info;
+ *ret_src_hw_info = hw_src_action_info;
+
+ return 0;
+}
+
+static int
+dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn,
+ __be64 *sw_action,
+ __be64 *hw_action,
+ const struct mlx5dr_ste_action_modify_field **ret_dst_hw_info,
+ const struct mlx5dr_ste_action_modify_field **ret_src_hw_info)
+{
+ u8 action;
+ int ret;
+
+ *hw_action = 0;
+ *ret_src_hw_info = NULL;
+
+ /* Get SW modify action type */
+ action = MLX5_GET(set_action_in, sw_action, action_type);
+
+ switch (action) {
+ case MLX5_ACTION_TYPE_SET:
+ ret = dr_action_modify_sw_to_hw_set(dmn, sw_action,
+ hw_action,
+ ret_dst_hw_info);
+ break;
+
+ case MLX5_ACTION_TYPE_ADD:
+ ret = dr_action_modify_sw_to_hw_add(dmn, sw_action,
+ hw_action,
+ ret_dst_hw_info);
+ break;
+
+ case MLX5_ACTION_TYPE_COPY:
+ ret = dr_action_modify_sw_to_hw_copy(dmn, sw_action,
+ hw_action,
+ ret_dst_hw_info,
+ ret_src_hw_info);
+ break;
+
+ default:
+ mlx5dr_info(dmn, "Unsupported action_type for modify action\n");
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static int
+dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action,
+ const __be64 *sw_action)
+{
+ u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+
+ if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
+ action->rewrite->allow_rx = 0;
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
+ mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
+ sw_field);
+ return -EINVAL;
+ }
+ } else if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
+ action->rewrite->allow_tx = 0;
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
+ mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
+ sw_field);
+ return -EINVAL;
+ }
+ }
+
+ if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) {
+ mlx5dr_dbg(dmn, "Modify SET actions not supported on both RX and TX\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+dr_action_modify_check_add_field_limitation(struct mlx5dr_action *action,
+ const __be64 *sw_action)
+{
+ u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+
+ if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL &&
+ sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT &&
+ sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM &&
+ sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) {
+ mlx5dr_dbg(dmn, "Unsupported field %d for add action\n",
+ sw_field);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action,
+ const __be64 *sw_action)
+{
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+ u16 sw_fields[2];
+ int i;
+
+ sw_fields[0] = MLX5_GET(copy_action_in, sw_action, src_field);
+ sw_fields[1] = MLX5_GET(copy_action_in, sw_action, dst_field);
+
+ for (i = 0; i < 2; i++) {
+ if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
+ action->rewrite->allow_rx = 0;
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
+ mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
+ sw_fields[i]);
+ return -EINVAL;
+ }
+ } else if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
+ action->rewrite->allow_tx = 0;
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
+ mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
+ sw_fields[i]);
+ return -EINVAL;
+ }
+ }
+ }
+
+ if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) {
+ mlx5dr_dbg(dmn, "Modify copy actions not supported on both RX and TX\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+dr_action_modify_check_field_limitation(struct mlx5dr_action *action,
+ const __be64 *sw_action)
+{
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+ u8 action_type;
+ int ret;
+
+ action_type = MLX5_GET(set_action_in, sw_action, action_type);
+
+ switch (action_type) {
+ case MLX5_ACTION_TYPE_SET:
+ ret = dr_action_modify_check_set_field_limitation(action,
+ sw_action);
+ break;
+
+ case MLX5_ACTION_TYPE_ADD:
+ ret = dr_action_modify_check_add_field_limitation(action,
+ sw_action);
+ break;
+
+ case MLX5_ACTION_TYPE_COPY:
+ ret = dr_action_modify_check_copy_field_limitation(action,
+ sw_action);
+ break;
+
+ default:
+ mlx5dr_info(dmn, "Unsupported action %d modify action\n",
+ action_type);
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static bool
+dr_action_modify_check_is_ttl_modify(const void *sw_action)
+{
+ u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
+
+ return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL;
+}
+
+static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
+ u32 max_hw_actions,
+ u32 num_sw_actions,
+ __be64 sw_actions[],
+ __be64 hw_actions[],
+ u32 *num_hw_actions,
+ bool *modify_ttl)
+{
+ const struct mlx5dr_ste_action_modify_field *hw_dst_action_info;
+ const struct mlx5dr_ste_action_modify_field *hw_src_action_info;
+ struct mlx5dr_domain *dmn = action->rewrite->dmn;
+ __be64 *modify_ttl_sw_action = NULL;
+ int ret, i, hw_idx = 0;
+ __be64 *sw_action;
+ __be64 hw_action;
+ u16 hw_field = 0;
+ u32 l3_type = 0;
+ u32 l4_type = 0;
+
+ *modify_ttl = false;
+
+ action->rewrite->allow_rx = 1;
+ action->rewrite->allow_tx = 1;
+
+ for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) {
+ /* modify TTL is handled separately, as a last action */
+ if (i == num_sw_actions) {
+ sw_action = modify_ttl_sw_action;
+ modify_ttl_sw_action = NULL;
+ } else {
+ sw_action = &sw_actions[i];
+ }
+
+ ret = dr_action_modify_check_field_limitation(action,
+ sw_action);
+ if (ret)
+ return ret;
+
+ if (!(*modify_ttl) &&
+ dr_action_modify_check_is_ttl_modify(sw_action)) {
+ modify_ttl_sw_action = sw_action;
+ *modify_ttl = true;
+ continue;
+ }
+
+ /* Convert SW action to HW action */
+ ret = dr_action_modify_sw_to_hw(dmn,
+ sw_action,
+ &hw_action,
+ &hw_dst_action_info,
+ &hw_src_action_info);
+ if (ret)
+ return ret;
+
+ /* Due to a HW limitation we cannot modify 2 different L3 types */
+ if (l3_type && hw_dst_action_info->l3_type &&
+ hw_dst_action_info->l3_type != l3_type) {
+ mlx5dr_dbg(dmn, "Action list can't support two different L3 types\n");
+ return -EINVAL;
+ }
+ if (hw_dst_action_info->l3_type)
+ l3_type = hw_dst_action_info->l3_type;
+
+ /* Due to a HW limitation we cannot modify two different L4 types */
+ if (l4_type && hw_dst_action_info->l4_type &&
+ hw_dst_action_info->l4_type != l4_type) {
+ mlx5dr_dbg(dmn, "Action list can't support two different L4 types\n");
+ return -EINVAL;
+ }
+ if (hw_dst_action_info->l4_type)
+ l4_type = hw_dst_action_info->l4_type;
+
+ /* HW reads and executes two actions at once this means we
+ * need to create a gap if two actions access the same field
+ */
+ if ((hw_idx % 2) && (hw_field == hw_dst_action_info->hw_field ||
+ (hw_src_action_info &&
+ hw_field == hw_src_action_info->hw_field))) {
+ /* Check if after gap insertion the total number of HW
+ * modify actions doesn't exceeds the limit
+ */
+ hw_idx++;
+ if (hw_idx >= max_hw_actions) {
+ mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n");
+ return -EINVAL;
+ }
+ }
+ hw_field = hw_dst_action_info->hw_field;
+
+ hw_actions[hw_idx] = hw_action;
+ hw_idx++;
+ }
+
+ /* if the resulting HW actions list is empty, add NOP action */
+ if (!hw_idx)
+ hw_idx++;
+
+ *num_hw_actions = hw_idx;
+
+ return 0;
+}
+
+static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
+ size_t actions_sz,
+ __be64 actions[],
+ struct mlx5dr_action *action)
+{
+ struct mlx5dr_icm_chunk *chunk;
+ u32 max_hw_actions;
+ u32 num_hw_actions;
+ u32 num_sw_actions;
+ __be64 *hw_actions;
+ bool modify_ttl;
+ int ret;
+
+ num_sw_actions = actions_sz / DR_MODIFY_ACTION_SIZE;
+ max_hw_actions = mlx5dr_icm_pool_chunk_size_to_entries(DR_CHUNK_SIZE_16);
+
+ if (num_sw_actions > max_hw_actions) {
+ mlx5dr_dbg(dmn, "Max number of actions %d exceeds limit %d\n",
+ num_sw_actions, max_hw_actions);
+ return -EINVAL;
+ }
+
+ chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16);
+ if (!chunk)
+ return -ENOMEM;
+
+ hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL);
+ if (!hw_actions) {
+ ret = -ENOMEM;
+ goto free_chunk;
+ }
+
+ ret = dr_actions_convert_modify_header(action,
+ max_hw_actions,
+ num_sw_actions,
+ actions,
+ hw_actions,
+ &num_hw_actions,
+ &modify_ttl);
+ if (ret)
+ goto free_hw_actions;
+
+ action->rewrite->chunk = chunk;
+ action->rewrite->modify_ttl = modify_ttl;
+ action->rewrite->data = (u8 *)hw_actions;
+ action->rewrite->num_of_actions = num_hw_actions;
+ action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) -
+ dmn->info.caps.hdr_modify_icm_addr) /
+ ACTION_CACHE_LINE_SIZE;
+
+ ret = mlx5dr_send_postsend_action(dmn, action);
+ if (ret)
+ goto free_hw_actions;
+
+ return 0;
+
+free_hw_actions:
+ kfree(hw_actions);
+free_chunk:
+ mlx5dr_icm_free_chunk(chunk);
+ return ret;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn,
+ u32 flags,
+ size_t actions_sz,
+ __be64 actions[])
+{
+ struct mlx5dr_action *action;
+ int ret = 0;
+
+ refcount_inc(&dmn->refcount);
+
+ if (actions_sz % DR_MODIFY_ACTION_SIZE) {
+ mlx5dr_dbg(dmn, "Invalid modify actions size provided\n");
+ goto dec_ref;
+ }
+
+ action = dr_action_create_generic(DR_ACTION_TYP_MODIFY_HDR);
+ if (!action)
+ goto dec_ref;
+
+ action->rewrite->dmn = dmn;
+
+ ret = dr_action_create_modify_action(dmn,
+ actions_sz,
+ actions,
+ action);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed creating modify header action %d\n", ret);
+ goto free_action;
+ }
+
+ return action;
+
+free_action:
+ kfree(action);
+dec_ref:
+ refcount_dec(&dmn->refcount);
+ return NULL;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
+ u16 vport, u8 vhca_id_valid,
+ u16 vhca_id)
+{
+ struct mlx5dr_cmd_vport_cap *vport_cap;
+ struct mlx5dr_domain *vport_dmn;
+ struct mlx5dr_action *action;
+ u8 peer_vport;
+
+ peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi);
+ vport_dmn = peer_vport ? dmn->peer_dmn : dmn;
+ if (!vport_dmn) {
+ mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n");
+ return NULL;
+ }
+
+ if (vport_dmn->type != MLX5DR_DOMAIN_TYPE_FDB) {
+ mlx5dr_dbg(dmn, "Domain doesn't support vport actions\n");
+ return NULL;
+ }
+
+ vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, vport);
+ if (!vport_cap) {
+ mlx5dr_err(dmn,
+ "Failed to get vport 0x%x caps - vport is disabled or invalid\n",
+ vport);
+ return NULL;
+ }
+
+ action = dr_action_create_generic(DR_ACTION_TYP_VPORT);
+ if (!action)
+ return NULL;
+
+ action->vport->dmn = vport_dmn;
+ action->vport->caps = vport_cap;
+
+ return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u32 obj_id,
+ u8 dest_reg_id, u8 aso_type,
+ u8 init_color, u8 meter_id)
+{
+ struct mlx5dr_action *action;
+
+ if (aso_type != MLX5_EXE_ASO_FLOW_METER)
+ return NULL;
+
+ if (init_color > MLX5_FLOW_METER_COLOR_UNDEFINED)
+ return NULL;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_ASO_FLOW_METER);
+ if (!action)
+ return NULL;
+
+ action->aso->obj_id = obj_id;
+ action->aso->offset = meter_id;
+ action->aso->dest_reg_id = dest_reg_id;
+ action->aso->init_color = init_color;
+ action->aso->dmn = dmn;
+
+ refcount_inc(&dmn->refcount);
+
+ return action;
+}
+
+int mlx5dr_action_destroy(struct mlx5dr_action *action)
+{
+ if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1))
+ return -EBUSY;
+
+ switch (action->action_type) {
+ case DR_ACTION_TYP_FT:
+ if (action->dest_tbl->is_fw_tbl)
+ refcount_dec(&action->dest_tbl->fw_tbl.dmn->refcount);
+ else
+ refcount_dec(&action->dest_tbl->tbl->refcount);
+
+ if (action->dest_tbl->is_fw_tbl &&
+ action->dest_tbl->fw_tbl.num_of_ref_actions) {
+ struct mlx5dr_action **ref_actions;
+ int i;
+
+ ref_actions = action->dest_tbl->fw_tbl.ref_actions;
+ for (i = 0; i < action->dest_tbl->fw_tbl.num_of_ref_actions; i++)
+ refcount_dec(&ref_actions[i]->refcount);
+
+ kfree(ref_actions);
+
+ mlx5dr_fw_destroy_md_tbl(action->dest_tbl->fw_tbl.dmn,
+ action->dest_tbl->fw_tbl.id,
+ action->dest_tbl->fw_tbl.group_id);
+ }
+ break;
+ case DR_ACTION_TYP_TNL_L2_TO_L2:
+ case DR_ACTION_TYP_REMOVE_HDR:
+ refcount_dec(&action->reformat->dmn->refcount);
+ break;
+ case DR_ACTION_TYP_TNL_L3_TO_L2:
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ refcount_dec(&action->rewrite->dmn->refcount);
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L2:
+ case DR_ACTION_TYP_L2_TO_TNL_L3:
+ case DR_ACTION_TYP_INSERT_HDR:
+ mlx5dr_cmd_destroy_reformat_ctx((action->reformat->dmn)->mdev,
+ action->reformat->id);
+ refcount_dec(&action->reformat->dmn->refcount);
+ break;
+ case DR_ACTION_TYP_MODIFY_HDR:
+ mlx5dr_icm_free_chunk(action->rewrite->chunk);
+ kfree(action->rewrite->data);
+ refcount_dec(&action->rewrite->dmn->refcount);
+ break;
+ case DR_ACTION_TYP_SAMPLER:
+ refcount_dec(&action->sampler->dmn->refcount);
+ break;
+ case DR_ACTION_TYP_ASO_FLOW_METER:
+ refcount_dec(&action->aso->dmn->refcount);
+ break;
+ default:
+ break;
+ }
+
+ kfree(action);
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c
new file mode 100644
index 000000000..7df11a019
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 - 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006 - 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
+ */
+
+#include "dr_types.h"
+
+int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int max_order)
+{
+ int i;
+
+ buddy->max_order = max_order;
+
+ INIT_LIST_HEAD(&buddy->list_node);
+ INIT_LIST_HEAD(&buddy->used_list);
+ INIT_LIST_HEAD(&buddy->hot_list);
+
+ buddy->bitmap = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->bitmap),
+ GFP_KERNEL);
+ buddy->num_free = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->num_free),
+ GFP_KERNEL);
+
+ if (!buddy->bitmap || !buddy->num_free)
+ goto err_free_all;
+
+ /* Allocating max_order bitmaps, one for each order */
+
+ for (i = 0; i <= buddy->max_order; ++i) {
+ unsigned int size = 1 << (buddy->max_order - i);
+
+ buddy->bitmap[i] = bitmap_zalloc(size, GFP_KERNEL);
+ if (!buddy->bitmap[i])
+ goto err_out_free_each_bit_per_order;
+ }
+
+ /* In the beginning, we have only one order that is available for
+ * use (the biggest one), so mark the first bit in both bitmaps.
+ */
+
+ bitmap_set(buddy->bitmap[buddy->max_order], 0, 1);
+
+ buddy->num_free[buddy->max_order] = 1;
+
+ return 0;
+
+err_out_free_each_bit_per_order:
+ for (i = 0; i <= buddy->max_order; ++i)
+ bitmap_free(buddy->bitmap[i]);
+
+err_free_all:
+ kfree(buddy->num_free);
+ kfree(buddy->bitmap);
+ return -ENOMEM;
+}
+
+void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ int i;
+
+ list_del(&buddy->list_node);
+
+ for (i = 0; i <= buddy->max_order; ++i)
+ bitmap_free(buddy->bitmap[i]);
+
+ kfree(buddy->num_free);
+ kfree(buddy->bitmap);
+}
+
+static int dr_buddy_find_free_seg(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int start_order,
+ unsigned int *segment,
+ unsigned int *order)
+{
+ unsigned int seg, order_iter, m;
+
+ for (order_iter = start_order;
+ order_iter <= buddy->max_order; ++order_iter) {
+ if (!buddy->num_free[order_iter])
+ continue;
+
+ m = 1 << (buddy->max_order - order_iter);
+ seg = find_first_bit(buddy->bitmap[order_iter], m);
+
+ if (WARN(seg >= m,
+ "ICM Buddy: failed finding free mem for order %d\n",
+ order_iter))
+ return -ENOMEM;
+
+ break;
+ }
+
+ if (order_iter > buddy->max_order)
+ return -ENOMEM;
+
+ *segment = seg;
+ *order = order_iter;
+ return 0;
+}
+
+/**
+ * mlx5dr_buddy_alloc_mem() - Update second level bitmap.
+ * @buddy: Buddy to update.
+ * @order: Order of the buddy to update.
+ * @segment: Segment number.
+ *
+ * This function finds the first area of the ICM memory managed by this buddy.
+ * It uses the data structures of the buddy system in order to find the first
+ * area of free place, starting from the current order till the maximum order
+ * in the system.
+ *
+ * Return: 0 when segment is set, non-zero error status otherwise.
+ *
+ * The function returns the location (segment) in the whole buddy ICM memory
+ * area - the index of the memory segment that is available for use.
+ */
+int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int order,
+ unsigned int *segment)
+{
+ unsigned int seg, order_iter;
+ int err;
+
+ err = dr_buddy_find_free_seg(buddy, order, &seg, &order_iter);
+ if (err)
+ return err;
+
+ bitmap_clear(buddy->bitmap[order_iter], seg, 1);
+ --buddy->num_free[order_iter];
+
+ /* If we found free memory in some order that is bigger than the
+ * required order, we need to split every order between the required
+ * order and the order that we found into two parts, and mark accordingly.
+ */
+ while (order_iter > order) {
+ --order_iter;
+ seg <<= 1;
+ bitmap_set(buddy->bitmap[order_iter], seg ^ 1, 1);
+ ++buddy->num_free[order_iter];
+ }
+
+ seg <<= order;
+ *segment = seg;
+
+ return 0;
+}
+
+void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int seg, unsigned int order)
+{
+ seg >>= order;
+
+ /* Whenever a segment is free,
+ * the mem is added to the buddy that gave it.
+ */
+ while (test_bit(seg ^ 1, buddy->bitmap[order])) {
+ bitmap_clear(buddy->bitmap[order], seg ^ 1, 1);
+ --buddy->num_free[order];
+ seg >>= 1;
+ ++order;
+ }
+ bitmap_set(buddy->bitmap[order], seg, 1);
+
+ ++buddy->num_free[order];
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
new file mode 100644
index 000000000..d7b1a230b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -0,0 +1,824 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
+ bool other_vport,
+ u16 vport_number,
+ u64 *icm_address_rx,
+ u64 *icm_address_tx)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+ int err;
+
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport);
+ MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number);
+
+ err = mlx5_cmd_exec_inout(mdev, query_esw_vport_context, in, out);
+ if (err)
+ return err;
+
+ *icm_address_rx =
+ MLX5_GET64(query_esw_vport_context_out, out,
+ esw_vport_context.sw_steering_vport_icm_address_rx);
+ *icm_address_tx =
+ MLX5_GET64(query_esw_vport_context_out, out,
+ esw_vport_context.sw_steering_vport_icm_address_tx);
+ return 0;
+}
+
+int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport,
+ u16 vport_number, u16 *gvmi)
+{
+ u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+ int out_size;
+ void *out;
+ int err;
+
+ out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ out = kzalloc(out_size, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, other_function, other_vport);
+ MLX5_SET(query_hca_cap_in, in, function_id, vport_number);
+ MLX5_SET(query_hca_cap_in, in, op_mod,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
+ HCA_CAP_OPMOD_GET_CUR);
+
+ err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
+ if (err) {
+ kfree(out);
+ return err;
+ }
+
+ *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id);
+
+ kfree(out);
+ return 0;
+}
+
+int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
+ struct mlx5dr_esw_caps *caps)
+{
+ caps->drop_icm_address_rx =
+ MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_fdb_action_drop_icm_address_rx);
+ caps->drop_icm_address_tx =
+ MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_fdb_action_drop_icm_address_tx);
+ caps->uplink_icm_address_rx =
+ MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_uplink_icm_address_rx);
+ caps->uplink_icm_address_tx =
+ MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_uplink_icm_address_tx);
+ caps->sw_owner_v2 = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner_v2);
+ if (!caps->sw_owner_v2)
+ caps->sw_owner = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner);
+
+ return 0;
+}
+
+static int dr_cmd_query_nic_vport_roce_en(struct mlx5_core_dev *mdev,
+ u16 vport, bool *roce_en)
+{
+ u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
+ int err;
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, !!vport);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *roce_en = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.roce_en);
+ return 0;
+}
+
+int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
+ struct mlx5dr_cmd_caps *caps)
+{
+ bool roce_en;
+ int err;
+
+ caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required);
+ caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager);
+ caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id);
+ caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols);
+ caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version);
+ caps->roce_caps.fl_rc_qp_when_roce_disabled =
+ MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled);
+
+ if (MLX5_CAP_GEN(mdev, roce)) {
+ err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
+ if (err)
+ return err;
+
+ caps->roce_caps.roce_en = roce_en;
+ caps->roce_caps.fl_rc_qp_when_roce_disabled |=
+ MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
+ caps->roce_caps.fl_rc_qp_when_roce_enabled =
+ MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);
+ }
+
+ caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new);
+
+ /* geneve_tlv_option_0_exist is the indication of
+ * STE support for lookup type flex_parser_ok
+ */
+ caps->flex_parser_ok_bits_supp =
+ MLX5_CAP_FLOWTABLE(mdev,
+ flow_table_properties_nic_receive.ft_field_support.geneve_tlv_option_0_exist);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) {
+ caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0);
+ caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1);
+ }
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED) {
+ caps->flex_parser_id_icmpv6_dw0 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0);
+ caps->flex_parser_id_icmpv6_dw1 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1);
+ }
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED)
+ caps->flex_parser_id_geneve_tlv_option_0 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_geneve_tlv_option_0);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED)
+ caps->flex_parser_id_mpls_over_gre =
+ MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED)
+ caps->flex_parser_id_mpls_over_udp =
+ MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED)
+ caps->flex_parser_id_gtpu_dw_0 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_0);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED)
+ caps->flex_parser_id_gtpu_teid =
+ MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_teid);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED)
+ caps->flex_parser_id_gtpu_dw_2 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_2);
+
+ if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED)
+ caps->flex_parser_id_gtpu_first_ext_dw_0 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_first_ext_dw_0);
+
+ caps->nic_rx_drop_address =
+ MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address);
+ caps->nic_tx_drop_address =
+ MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_drop_icm_address);
+ caps->nic_tx_allow_address =
+ MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address);
+
+ caps->rx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner_v2);
+ caps->tx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner_v2);
+
+ if (!caps->rx_sw_owner_v2)
+ caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner);
+ if (!caps->tx_sw_owner_v2)
+ caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner);
+
+ caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level);
+
+ caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size);
+ caps->hdr_modify_icm_addr =
+ MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address);
+
+ caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port);
+
+ caps->is_ecpf = mlx5_core_is_ecpf_esw_manager(mdev);
+
+ return 0;
+}
+
+int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
+ enum fs_flow_table_type type,
+ u32 table_id,
+ struct mlx5dr_cmd_query_flow_table_details *output)
+{
+ u32 out[MLX5_ST_SZ_DW(query_flow_table_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_flow_table_in)] = {};
+ int err;
+
+ MLX5_SET(query_flow_table_in, in, opcode,
+ MLX5_CMD_OP_QUERY_FLOW_TABLE);
+
+ MLX5_SET(query_flow_table_in, in, table_type, type);
+ MLX5_SET(query_flow_table_in, in, table_id, table_id);
+
+ err = mlx5_cmd_exec_inout(dev, query_flow_table, in, out);
+ if (err)
+ return err;
+
+ output->status = MLX5_GET(query_flow_table_out, out, status);
+ output->level = MLX5_GET(query_flow_table_out, out, flow_table_context.level);
+
+ output->sw_owner_icm_root_1 = MLX5_GET64(query_flow_table_out, out,
+ flow_table_context.sw_owner_icm_root_1);
+ output->sw_owner_icm_root_0 = MLX5_GET64(query_flow_table_out, out,
+ flow_table_context.sw_owner_icm_root_0);
+
+ return 0;
+}
+
+int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev,
+ u32 sampler_id,
+ u64 *rx_icm_addr,
+ u64 *tx_icm_addr)
+{
+ u32 out[MLX5_ST_SZ_DW(query_sampler_obj_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ void *attr;
+ int ret;
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ attr = MLX5_ADDR_OF(query_sampler_obj_out, out, sampler_object);
+
+ *rx_icm_addr = MLX5_GET64(sampler_obj, attr,
+ sw_steering_icm_address_rx);
+ *tx_icm_addr = MLX5_GET64(sampler_obj, attr,
+ sw_steering_icm_address_tx);
+
+ return 0;
+}
+
+int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev)
+{
+ u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {};
+
+ MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING);
+
+ return mlx5_cmd_exec_in(mdev, sync_steering, in);
+}
+
+int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 group_id,
+ u32 modify_header_id,
+ u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {};
+ void *in_flow_context;
+ unsigned int inlen;
+ void *in_dests;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
+ 1 * MLX5_ST_SZ_BYTES(dest_format_struct); /* One destination only */
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+ MLX5_SET(set_fte_in, in, table_type, table_type);
+ MLX5_SET(set_fte_in, in, table_id, table_id);
+
+ in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+ MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+ MLX5_SET(flow_context, in_flow_context, modify_header_id, modify_header_id);
+ MLX5_SET(flow_context, in_flow_context, destination_list_size, 1);
+ MLX5_SET(flow_context, in_flow_context, action,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR);
+
+ in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+ MLX5_SET(dest_format_struct, in_dests, destination_type,
+ MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT);
+ MLX5_SET(dest_format_struct, in_dests, destination_id, vport);
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id)
+{
+ u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {};
+
+ MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+ MLX5_SET(delete_fte_in, in, table_type, table_type);
+ MLX5_SET(delete_fte_in, in, table_id, table_id);
+
+ return mlx5_cmd_exec_in(mdev, delete_fte, in);
+}
+
+int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u8 num_of_actions,
+ u64 *actions,
+ u32 *modify_header_id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {};
+ void *p_actions;
+ u32 inlen;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) +
+ num_of_actions * sizeof(u64);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(alloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type);
+ MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_of_actions);
+ p_actions = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions);
+ memcpy(p_actions, actions, num_of_actions * sizeof(u64));
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (err)
+ goto out;
+
+ *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out,
+ modify_header_id);
+out:
+ kvfree(in);
+ return err;
+}
+
+int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
+ u32 modify_header_id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {};
+
+ MLX5_SET(dealloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
+ modify_header_id);
+
+ return mlx5_cmd_exec_in(mdev, dealloc_modify_header_context, in);
+}
+
+int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 *group_id)
+{
+ u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {};
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP);
+ MLX5_SET(create_flow_group_in, in, table_type, table_type);
+ MLX5_SET(create_flow_group_in, in, table_id, table_id);
+
+ err = mlx5_cmd_exec_inout(mdev, create_flow_group, in, out);
+ if (err)
+ goto out;
+
+ *group_id = MLX5_GET(create_flow_group_out, out, group_id);
+
+out:
+ kvfree(in);
+ return err;
+}
+
+int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 group_id)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {};
+
+ MLX5_SET(destroy_flow_group_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+ MLX5_SET(destroy_flow_group_in, in, table_type, table_type);
+ MLX5_SET(destroy_flow_group_in, in, table_id, table_id);
+ MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
+
+ return mlx5_cmd_exec_in(mdev, destroy_flow_group, in);
+}
+
+int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
+ struct mlx5dr_cmd_create_flow_table_attr *attr,
+ u64 *fdb_rx_icm_addr,
+ u32 *table_id)
+{
+ u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
+ void *ft_mdev;
+ int err;
+
+ MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
+ MLX5_SET(create_flow_table_in, in, table_type, attr->table_type);
+ MLX5_SET(create_flow_table_in, in, uid, attr->uid);
+
+ ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
+ MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl);
+ MLX5_SET(flow_table_context, ft_mdev, sw_owner, attr->sw_owner);
+ MLX5_SET(flow_table_context, ft_mdev, level, attr->level);
+
+ if (attr->sw_owner) {
+ /* icm_addr_0 used for FDB RX / NIC TX / NIC_RX
+ * icm_addr_1 used for FDB TX
+ */
+ if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) {
+ MLX5_SET64(flow_table_context, ft_mdev,
+ sw_owner_icm_root_0, attr->icm_addr_rx);
+ } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) {
+ MLX5_SET64(flow_table_context, ft_mdev,
+ sw_owner_icm_root_0, attr->icm_addr_tx);
+ } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB) {
+ MLX5_SET64(flow_table_context, ft_mdev,
+ sw_owner_icm_root_0, attr->icm_addr_rx);
+ MLX5_SET64(flow_table_context, ft_mdev,
+ sw_owner_icm_root_1, attr->icm_addr_tx);
+ }
+ }
+
+ MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
+ attr->decap_en);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
+ attr->reformat_en);
+
+ err = mlx5_cmd_exec_inout(mdev, create_flow_table, in, out);
+ if (err)
+ return err;
+
+ *table_id = MLX5_GET(create_flow_table_out, out, table_id);
+ if (!attr->sw_owner && attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB &&
+ fdb_rx_icm_addr)
+ *fdb_rx_icm_addr =
+ (u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) |
+ (u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 |
+ (u64)MLX5_GET(create_flow_table_out, out, icm_address_63_40) << 40;
+
+ return 0;
+}
+
+int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
+ u32 table_id,
+ u32 table_type)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {};
+
+ MLX5_SET(destroy_flow_table_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+ MLX5_SET(destroy_flow_table_in, in, table_type, table_type);
+ MLX5_SET(destroy_flow_table_in, in, table_id, table_id);
+
+ return mlx5_cmd_exec_in(mdev, destroy_flow_table, in);
+}
+
+int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+ enum mlx5_reformat_ctx_type rt,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
+ size_t reformat_size,
+ void *reformat_data,
+ u32 *reformat_id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {};
+ size_t inlen, cmd_data_sz, cmd_total_sz;
+ void *prctx;
+ void *pdata;
+ void *in;
+ int err;
+
+ cmd_total_sz = MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in);
+ cmd_data_sz = MLX5_FLD_SZ_BYTES(alloc_packet_reformat_context_in,
+ packet_reformat_context.reformat_data);
+ inlen = ALIGN(cmd_total_sz + reformat_size - cmd_data_sz, 4);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+
+ prctx = MLX5_ADDR_OF(alloc_packet_reformat_context_in, in, packet_reformat_context);
+ pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data);
+
+ MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt);
+ MLX5_SET(packet_reformat_context_in, prctx, reformat_param_0, reformat_param_0);
+ MLX5_SET(packet_reformat_context_in, prctx, reformat_param_1, reformat_param_1);
+ MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size);
+ if (reformat_data && reformat_size)
+ memcpy(pdata, reformat_data, reformat_size);
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (err)
+ goto err_free_in;
+
+ *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
+
+err_free_in:
+ kvfree(in);
+ return err;
+}
+
+void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
+ u32 reformat_id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {};
+
+ MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
+ reformat_id);
+
+ mlx5_cmd_exec_in(mdev, dealloc_packet_reformat_context, in);
+}
+
+int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
+ u16 index, struct mlx5dr_cmd_gid_attr *attr)
+{
+ u32 out[MLX5_ST_SZ_DW(query_roce_address_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_roce_address_in)] = {};
+ int err;
+
+ MLX5_SET(query_roce_address_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ROCE_ADDRESS);
+
+ MLX5_SET(query_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num);
+
+ err = mlx5_cmd_exec_inout(mdev, query_roce_address, in, out);
+ if (err)
+ return err;
+
+ memcpy(&attr->gid,
+ MLX5_ADDR_OF(query_roce_address_out,
+ out, roce_address.source_l3_address),
+ sizeof(attr->gid));
+ memcpy(attr->mac,
+ MLX5_ADDR_OF(query_roce_address_out, out,
+ roce_address.source_mac_47_32),
+ sizeof(attr->mac));
+
+ if (MLX5_GET(query_roce_address_out, out,
+ roce_address.roce_version) == MLX5_ROCE_VERSION_2)
+ attr->roce_ver = MLX5_ROCE_VERSION_2;
+ else
+ attr->roce_ver = MLX5_ROCE_VERSION_1;
+
+ return 0;
+}
+
+static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev,
+ struct mlx5dr_cmd_fte_info *fte,
+ bool *extended_dest)
+{
+ int fw_log_max_fdb_encap_uplink = MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
+ int num_fwd_destinations = 0;
+ int num_encap = 0;
+ int i;
+
+ *extended_dest = false;
+ if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return 0;
+ for (i = 0; i < fte->dests_size; i++) {
+ if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ||
+ fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_NONE)
+ continue;
+ if ((fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
+ fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+ num_encap++;
+ num_fwd_destinations++;
+ }
+
+ if (num_fwd_destinations > 1 && num_encap > 0)
+ *extended_dest = true;
+
+ if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
+ mlx5_core_warn(dev, "FW does not support extended destination");
+ return -EOPNOTSUPP;
+ }
+ if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
+ mlx5_core_warn(dev, "FW does not support more than %d encaps",
+ 1 << fw_log_max_fdb_encap_uplink);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
+ int opmod, int modify_mask,
+ struct mlx5dr_cmd_ft_info *ft,
+ u32 group_id,
+ struct mlx5dr_cmd_fte_info *fte)
+{
+ u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {};
+ void *in_flow_context, *vlan;
+ bool extended_dest = false;
+ void *in_match_value;
+ unsigned int inlen;
+ int dst_cnt_size;
+ void *in_dests;
+ u32 *in;
+ int err;
+ int i;
+
+ if (mlx5dr_cmd_set_extended_dest(dev, fte, &extended_dest))
+ return -EOPNOTSUPP;
+
+ if (!extended_dest)
+ dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
+ else
+ dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
+
+ inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+ MLX5_SET(set_fte_in, in, op_mod, opmod);
+ MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask);
+ MLX5_SET(set_fte_in, in, table_type, ft->type);
+ MLX5_SET(set_fte_in, in, table_id, ft->id);
+ MLX5_SET(set_fte_in, in, flow_index, fte->index);
+ MLX5_SET(set_fte_in, in, ignore_flow_level, fte->ignore_flow_level);
+ if (ft->vport) {
+ MLX5_SET(set_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(set_fte_in, in, other_vport, 1);
+ }
+
+ in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+ MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+
+ MLX5_SET(flow_context, in_flow_context, flow_tag,
+ fte->flow_context.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, flow_source,
+ fte->flow_context.flow_source);
+
+ MLX5_SET(flow_context, in_flow_context, extended_destination,
+ extended_dest);
+ if (extended_dest) {
+ u32 action;
+
+ action = fte->action.action &
+ ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ MLX5_SET(flow_context, in_flow_context, action, action);
+ } else {
+ MLX5_SET(flow_context, in_flow_context, action,
+ fte->action.action);
+ if (fte->action.pkt_reformat)
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+ fte->action.pkt_reformat->id);
+ }
+ if (fte->action.modify_hdr)
+ MLX5_SET(flow_context, in_flow_context, modify_header_id,
+ fte->action.modify_hdr->id);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
+
+ in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
+ match_value);
+ memcpy(in_match_value, fte->val, sizeof(u32) * MLX5_ST_SZ_DW_MATCH_PARAM);
+
+ in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ int list_size = 0;
+
+ for (i = 0; i < fte->dests_size; i++) {
+ enum mlx5_flow_destination_type type = fte->dest_arr[i].type;
+ enum mlx5_ifc_flow_destination_type ifc_type;
+ unsigned int id;
+
+ if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_NONE:
+ continue;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ id = fte->dest_arr[i].ft_num;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ id = fte->dest_arr[i].ft_id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ if (type == MLX5_FLOW_DESTINATION_TYPE_VPORT) {
+ id = fte->dest_arr[i].vport.num;
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid,
+ !!(fte->dest_arr[i].vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID));
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT;
+ } else {
+ id = 0;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK;
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid, 1);
+ }
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id,
+ fte->dest_arr[i].vport.vhca_id);
+ if (extended_dest && (fte->dest_arr[i].vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) {
+ MLX5_SET(dest_format_struct, in_dests,
+ packet_reformat,
+ !!(fte->dest_arr[i].vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
+ MLX5_SET(extended_dest_format, in_dests,
+ packet_reformat_id,
+ fte->dest_arr[i].vport.reformat_id);
+ }
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+ id = fte->dest_arr[i].sampler_id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
+ break;
+ default:
+ id = fte->dest_arr[i].tir_num;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR;
+ }
+
+ MLX5_SET(dest_format_struct, in_dests, destination_type,
+ ifc_type);
+ MLX5_SET(dest_format_struct, in_dests, destination_id, id);
+ in_dests += dst_cnt_size;
+ list_size++;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, destination_list_size,
+ list_size);
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
+ log_max_flow_counter,
+ ft->type));
+ int list_size = 0;
+
+ for (i = 0; i < fte->dests_size; i++) {
+ if (fte->dest_arr[i].type !=
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
+ fte->dest_arr[i].counter_id);
+ in_dests += dst_cnt_size;
+ list_size++;
+ }
+ if (list_size > max_list_size) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, flow_counter_list_size,
+ list_size);
+ }
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+err_out:
+ kvfree(in);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
new file mode 100644
index 000000000..7adcf0eec
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -0,0 +1,657 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include "dr_types.h"
+
+#define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL)
+
+enum dr_dump_rec_type {
+ DR_DUMP_REC_TYPE_DOMAIN = 3000,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER = 3001,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_DEV_ATTR = 3002,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT = 3003,
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS = 3004,
+ DR_DUMP_REC_TYPE_DOMAIN_SEND_RING = 3005,
+
+ DR_DUMP_REC_TYPE_TABLE = 3100,
+ DR_DUMP_REC_TYPE_TABLE_RX = 3101,
+ DR_DUMP_REC_TYPE_TABLE_TX = 3102,
+
+ DR_DUMP_REC_TYPE_MATCHER = 3200,
+ DR_DUMP_REC_TYPE_MATCHER_MASK_DEPRECATED = 3201,
+ DR_DUMP_REC_TYPE_MATCHER_RX = 3202,
+ DR_DUMP_REC_TYPE_MATCHER_TX = 3203,
+ DR_DUMP_REC_TYPE_MATCHER_BUILDER = 3204,
+ DR_DUMP_REC_TYPE_MATCHER_MASK = 3205,
+
+ DR_DUMP_REC_TYPE_RULE = 3300,
+ DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 = 3301,
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0 = 3302,
+ DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 = 3303,
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1 = 3304,
+
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L2 = 3400,
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L3 = 3401,
+ DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR = 3402,
+ DR_DUMP_REC_TYPE_ACTION_DROP = 3403,
+ DR_DUMP_REC_TYPE_ACTION_QP = 3404,
+ DR_DUMP_REC_TYPE_ACTION_FT = 3405,
+ DR_DUMP_REC_TYPE_ACTION_CTR = 3406,
+ DR_DUMP_REC_TYPE_ACTION_TAG = 3407,
+ DR_DUMP_REC_TYPE_ACTION_VPORT = 3408,
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L2 = 3409,
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L3 = 3410,
+ DR_DUMP_REC_TYPE_ACTION_DEVX_TIR = 3411,
+ DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN = 3412,
+ DR_DUMP_REC_TYPE_ACTION_POP_VLAN = 3413,
+ DR_DUMP_REC_TYPE_ACTION_SAMPLER = 3415,
+ DR_DUMP_REC_TYPE_ACTION_INSERT_HDR = 3420,
+ DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421
+};
+
+void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl)
+{
+ mutex_lock(&tbl->dmn->dump_info.dbg_mutex);
+ list_add_tail(&tbl->dbg_node, &tbl->dmn->dbg_tbl_list);
+ mutex_unlock(&tbl->dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl)
+{
+ mutex_lock(&tbl->dmn->dump_info.dbg_mutex);
+ list_del(&tbl->dbg_node);
+ mutex_unlock(&tbl->dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+
+ mutex_lock(&dmn->dump_info.dbg_mutex);
+ list_add_tail(&rule->dbg_node, &rule->matcher->dbg_rule_list);
+ mutex_unlock(&dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+
+ mutex_lock(&dmn->dump_info.dbg_mutex);
+ list_del(&rule->dbg_node);
+ mutex_unlock(&dmn->dump_info.dbg_mutex);
+}
+
+static u64 dr_dump_icm_to_idx(u64 icm_addr)
+{
+ return (icm_addr >> 6) & 0xffffffff;
+}
+
+#define DR_HEX_SIZE 256
+
+static void
+dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size)
+{
+ if (WARN_ON_ONCE(DR_HEX_SIZE < 2 * size + 1))
+ size = DR_HEX_SIZE / 2 - 1; /* truncate */
+
+ bin2hex(hex, src, size);
+ hex[2 * size] = 0; /* NULL-terminate */
+}
+
+static int
+dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
+ struct mlx5dr_rule_action_member *action_mem)
+{
+ struct mlx5dr_action *action = action_mem->action;
+ const u64 action_id = DR_DBG_PTR_TO_ID(action);
+
+ switch (action->action_type) {
+ case DR_ACTION_TYP_DROP:
+ seq_printf(file, "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_DROP, action_id, rule_id);
+ break;
+ case DR_ACTION_TYP_FT:
+ if (action->dest_tbl->is_fw_tbl)
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_FT, action_id,
+ rule_id, action->dest_tbl->fw_tbl.id,
+ -1);
+ else
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_FT, action_id,
+ rule_id, action->dest_tbl->tbl->table_id,
+ DR_DBG_PTR_TO_ID(action->dest_tbl->tbl));
+
+ break;
+ case DR_ACTION_TYP_CTR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_CTR, action_id, rule_id,
+ action->ctr->ctr_id + action->ctr->offset);
+ break;
+ case DR_ACTION_TYP_TAG:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_TAG, action_id, rule_id,
+ action->flow_tag->flow_tag);
+ break;
+ case DR_ACTION_TYP_MODIFY_HDR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id,
+ rule_id, action->rewrite->index);
+ break;
+ case DR_ACTION_TYP_VPORT:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id,
+ action->vport->caps->num);
+ break;
+ case DR_ACTION_TYP_TNL_L2_TO_L2:
+ seq_printf(file, "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L2, action_id,
+ rule_id);
+ break;
+ case DR_ACTION_TYP_TNL_L3_TO_L2:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id,
+ rule_id, action->rewrite->index);
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L2:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L2, action_id,
+ rule_id, action->reformat->id);
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L3:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_ENCAP_L3, action_id,
+ rule_id, action->reformat->id);
+ break;
+ case DR_ACTION_TYP_POP_VLAN:
+ seq_printf(file, "%d,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_POP_VLAN, action_id,
+ rule_id);
+ break;
+ case DR_ACTION_TYP_PUSH_VLAN:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN, action_id,
+ rule_id, action->push_vlan->vlan_hdr);
+ break;
+ case DR_ACTION_TYP_INSERT_HDR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_INSERT_HDR, action_id,
+ rule_id, action->reformat->id,
+ action->reformat->param_0,
+ action->reformat->param_1);
+ break;
+ case DR_ACTION_TYP_REMOVE_HDR:
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR, action_id,
+ rule_id, action->reformat->id,
+ action->reformat->param_0,
+ action->reformat->param_1);
+ break;
+ case DR_ACTION_TYP_SAMPLER:
+ seq_printf(file,
+ "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_ACTION_SAMPLER, action_id, rule_id,
+ 0, 0, action->sampler->sampler_id,
+ action->sampler->rx_icm_addr,
+ action->sampler->tx_icm_addr);
+ break;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
+ bool is_rx, const u64 rule_id, u8 format_ver)
+{
+ char hw_ste_dump[DR_HEX_SIZE];
+ u32 mem_rec_type;
+
+ if (format_ver == MLX5_STEERING_FORMAT_CONNECTX_5) {
+ mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 :
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0;
+ } else {
+ mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 :
+ DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1;
+ }
+
+ dr_dump_hex_print(hw_ste_dump, (char *)mlx5dr_ste_get_hw_ste(ste),
+ DR_STE_SIZE_REDUCED);
+
+ seq_printf(file, "%d,0x%llx,0x%llx,%s\n", mem_rec_type,
+ dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)), rule_id,
+ hw_ste_dump);
+
+ return 0;
+}
+
+static int
+dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
+ bool is_rx, const u64 rule_id, u8 format_ver)
+{
+ struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
+ struct mlx5dr_ste *curr_ste = rule_rx_tx->last_rule_ste;
+ int ret, i;
+
+ if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i))
+ return 0;
+
+ while (i--) {
+ ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id,
+ format_ver);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_rule_action_member *action_mem;
+ const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
+ struct mlx5dr_rule_rx_tx *rx = &rule->rx;
+ struct mlx5dr_rule_rx_tx *tx = &rule->tx;
+ u8 format_ver;
+ int ret;
+
+ format_ver = rule->matcher->tbl->dmn->info.caps.sw_format_ver;
+
+ seq_printf(file, "%d,0x%llx,0x%llx\n", DR_DUMP_REC_TYPE_RULE, rule_id,
+ DR_DBG_PTR_TO_ID(rule->matcher));
+
+ if (rx->nic_matcher) {
+ ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (tx->nic_matcher) {
+ ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver);
+ if (ret < 0)
+ return ret;
+ }
+
+ list_for_each_entry(action_mem, &rule->rule_actions_list, list) {
+ ret = dr_dump_rule_action_mem(file, rule_id, action_mem);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
+ u8 criteria, const u64 matcher_id)
+{
+ char dump[DR_HEX_SIZE];
+
+ seq_printf(file, "%d,0x%llx,", DR_DUMP_REC_TYPE_MATCHER_MASK,
+ matcher_id);
+
+ if (criteria & DR_MATCHER_CRITERIA_OUTER) {
+ dr_dump_hex_print(dump, (char *)&mask->outer, sizeof(mask->outer));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_INNER) {
+ dr_dump_hex_print(dump, (char *)&mask->inner, sizeof(mask->inner));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_MISC) {
+ dr_dump_hex_print(dump, (char *)&mask->misc, sizeof(mask->misc));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_MISC2) {
+ dr_dump_hex_print(dump, (char *)&mask->misc2, sizeof(mask->misc2));
+ seq_printf(file, "%s,", dump);
+ } else {
+ seq_puts(file, ",");
+ }
+
+ if (criteria & DR_MATCHER_CRITERIA_MISC3) {
+ dr_dump_hex_print(dump, (char *)&mask->misc3, sizeof(mask->misc3));
+ seq_printf(file, "%s\n", dump);
+ } else {
+ seq_puts(file, ",\n");
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
+ u32 index, bool is_rx, const u64 matcher_id)
+{
+ seq_printf(file, "%d,0x%llx,%d,%d,0x%x\n",
+ DR_DUMP_REC_TYPE_MATCHER_BUILDER, matcher_id, index, is_rx,
+ builder->lu_type);
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
+ struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
+ const u64 matcher_id)
+{
+ enum dr_dump_rec_type rec_type;
+ u64 s_icm_addr, e_icm_addr;
+ int i, ret;
+
+ rec_type = is_rx ? DR_DUMP_REC_TYPE_MATCHER_RX :
+ DR_DUMP_REC_TYPE_MATCHER_TX;
+
+ s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->s_htbl->chunk);
+ e_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->e_anchor->chunk);
+ seq_printf(file, "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n",
+ rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx),
+ matcher_id, matcher_rx_tx->num_of_builders,
+ dr_dump_icm_to_idx(s_icm_addr),
+ dr_dump_icm_to_idx(e_icm_addr));
+
+ for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
+ ret = dr_dump_matcher_builder(file,
+ &matcher_rx_tx->ste_builder[i],
+ i, is_rx, matcher_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
+ struct mlx5dr_matcher_rx_tx *tx = &matcher->tx;
+ u64 matcher_id;
+ int ret;
+
+ matcher_id = DR_DBG_PTR_TO_ID(matcher);
+
+ seq_printf(file, "%d,0x%llx,0x%llx,%d\n", DR_DUMP_REC_TYPE_MATCHER,
+ matcher_id, DR_DBG_PTR_TO_ID(matcher->tbl), matcher->prio);
+
+ ret = dr_dump_matcher_mask(file, &matcher->mask,
+ matcher->match_criteria, matcher_id);
+ if (ret < 0)
+ return ret;
+
+ if (rx->nic_tbl) {
+ ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (tx->nic_tbl) {
+ ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_rule *rule;
+ int ret;
+
+ ret = dr_dump_matcher(file, matcher);
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(rule, &matcher->dbg_rule_list, dbg_node) {
+ ret = dr_dump_rule(file, rule);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
+ struct mlx5dr_table_rx_tx *table_rx_tx,
+ const u64 table_id)
+{
+ enum dr_dump_rec_type rec_type;
+ u64 s_icm_addr;
+
+ rec_type = is_rx ? DR_DUMP_REC_TYPE_TABLE_RX :
+ DR_DUMP_REC_TYPE_TABLE_TX;
+
+ s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(table_rx_tx->s_anchor->chunk);
+ seq_printf(file, "%d,0x%llx,0x%llx\n", rec_type, table_id,
+ dr_dump_icm_to_idx(s_icm_addr));
+
+ return 0;
+}
+
+static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
+{
+ struct mlx5dr_table_rx_tx *rx = &table->rx;
+ struct mlx5dr_table_rx_tx *tx = &table->tx;
+ int ret;
+
+ seq_printf(file, "%d,0x%llx,0x%llx,%d,%d\n", DR_DUMP_REC_TYPE_TABLE,
+ DR_DBG_PTR_TO_ID(table), DR_DBG_PTR_TO_ID(table->dmn),
+ table->table_type, table->level);
+
+ if (rx->nic_dmn) {
+ ret = dr_dump_table_rx_tx(file, true, rx,
+ DR_DBG_PTR_TO_ID(table));
+ if (ret < 0)
+ return ret;
+ }
+
+ if (tx->nic_dmn) {
+ ret = dr_dump_table_rx_tx(file, false, tx,
+ DR_DBG_PTR_TO_ID(table));
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl)
+{
+ struct mlx5dr_matcher *matcher;
+ int ret;
+
+ ret = dr_dump_table(file, tbl);
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(matcher, &tbl->matcher_list, list_node) {
+ ret = dr_dump_matcher_all(file, matcher);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int
+dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
+ const u64 domain_id)
+{
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n",
+ DR_DUMP_REC_TYPE_DOMAIN_SEND_RING, DR_DBG_PTR_TO_ID(ring),
+ domain_id, ring->cq->mcq.cqn, ring->qp->qpn);
+ return 0;
+}
+
+static int
+dr_dump_domain_info_flex_parser(struct seq_file *file,
+ const char *flex_parser_name,
+ const u8 flex_parser_value,
+ const u64 domain_id)
+{
+ seq_printf(file, "%d,0x%llx,%s,0x%x\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER, domain_id,
+ flex_parser_name, flex_parser_value);
+ return 0;
+}
+
+static int
+dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
+ const u64 domain_id)
+{
+ struct mlx5dr_cmd_vport_cap *vport_caps;
+ unsigned long i, vports_num;
+
+ xa_for_each(&caps->vports.vports_caps_xa, vports_num, vport_caps)
+ ; /* count the number of vports in xarray */
+
+ seq_printf(file, "%d,0x%llx,0x%x,0x%llx,0x%llx,0x%x,%lu,%d\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS, domain_id, caps->gvmi,
+ caps->nic_rx_drop_address, caps->nic_tx_drop_address,
+ caps->flex_protocols, vports_num, caps->eswitch_manager);
+
+ xa_for_each(&caps->vports.vports_caps_xa, i, vport_caps) {
+ vport_caps = xa_load(&caps->vports.vports_caps_xa, i);
+
+ seq_printf(file, "%d,0x%llx,%lu,0x%x,0x%llx,0x%llx\n",
+ DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT, domain_id, i,
+ vport_caps->vport_gvmi, vport_caps->icm_address_rx,
+ vport_caps->icm_address_tx);
+ }
+ return 0;
+}
+
+static int
+dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
+ const u64 domain_id)
+{
+ int ret;
+
+ ret = dr_dump_domain_info_caps(file, &info->caps, domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0",
+ info->caps.flex_parser_id_icmp_dw0,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1",
+ info->caps.flex_parser_id_icmp_dw1,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0",
+ info->caps.flex_parser_id_icmpv6_dw0,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1",
+ info->caps.flex_parser_id_icmpv6_dw1,
+ domain_id);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int
+dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
+{
+ u64 domain_id = DR_DBG_PTR_TO_ID(dmn);
+ int ret;
+
+ seq_printf(file, "%d,0x%llx,%d,0%x,%d,%s\n", DR_DUMP_REC_TYPE_DOMAIN,
+ domain_id, dmn->type, dmn->info.caps.gvmi,
+ dmn->info.supp_sw_steering, pci_name(dmn->mdev->pdev));
+
+ ret = dr_dump_domain_info(file, &dmn->info, domain_id);
+ if (ret < 0)
+ return ret;
+
+ if (dmn->info.supp_sw_steering) {
+ ret = dr_dump_send_ring(file, dmn->send_ring, domain_id);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_table *tbl;
+ int ret;
+
+ mutex_lock(&dmn->dump_info.dbg_mutex);
+ mlx5dr_domain_lock(dmn);
+
+ ret = dr_dump_domain(file, dmn);
+ if (ret < 0)
+ goto unlock_mutex;
+
+ list_for_each_entry(tbl, &dmn->dbg_tbl_list, dbg_node) {
+ ret = dr_dump_table_all(file, tbl);
+ if (ret < 0)
+ break;
+ }
+
+unlock_mutex:
+ mlx5dr_domain_unlock(dmn);
+ mutex_unlock(&dmn->dump_info.dbg_mutex);
+ return ret;
+}
+
+static int dr_dump_show(struct seq_file *file, void *priv)
+{
+ return dr_dump_domain_all(file, file->private);
+}
+DEFINE_SHOW_ATTRIBUTE(dr_dump);
+
+void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn)
+{
+ struct mlx5_core_dev *dev = dmn->mdev;
+ char file_name[128];
+
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) {
+ mlx5_core_warn(dev,
+ "Steering dump is not supported for NIC RX/TX domains\n");
+ return;
+ }
+
+ dmn->dump_info.steering_debugfs =
+ debugfs_create_dir("steering", mlx5_debugfs_get_dev_root(dev));
+ dmn->dump_info.fdb_debugfs =
+ debugfs_create_dir("fdb", dmn->dump_info.steering_debugfs);
+
+ sprintf(file_name, "dmn_%p", dmn);
+ debugfs_create_file(file_name, 0444, dmn->dump_info.fdb_debugfs,
+ dmn, &dr_dump_fops);
+
+ INIT_LIST_HEAD(&dmn->dbg_tbl_list);
+ mutex_init(&dmn->dump_info.dbg_mutex);
+}
+
+void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn)
+{
+ debugfs_remove_recursive(dmn->dump_info.steering_debugfs);
+ mutex_destroy(&dmn->dump_info.dbg_mutex);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h
new file mode 100644
index 000000000..def6cf853
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+struct mlx5dr_dbg_dump_info {
+ struct mutex dbg_mutex; /* protect dbg lists */
+ struct dentry *steering_debugfs;
+ struct dentry *fdb_debugfs;
+};
+
+void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn);
+void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn);
+void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl);
+void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl);
+void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule);
+void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
new file mode 100644
index 000000000..fc6ae49b5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/mlx5/eswitch.h>
+#include <linux/err.h>
+#include "dr_types.h"
+
+#define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \
+ ((dmn)->info.caps.dmn_type##_sw_owner || \
+ ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \
+ (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7))
+
+static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn)
+{
+ /* Per vport cached FW FT for checksum recalculation, this
+ * recalculation is needed due to a HW bug in STEv0.
+ */
+ xa_init(&dmn->csum_fts_xa);
+}
+
+static void dr_domain_uninit_csum_recalc_fts(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
+ unsigned long i;
+
+ xa_for_each(&dmn->csum_fts_xa, i, recalc_cs_ft) {
+ if (recalc_cs_ft)
+ mlx5dr_fw_destroy_recalc_cs_ft(dmn, recalc_cs_ft);
+ }
+
+ xa_destroy(&dmn->csum_fts_xa);
+}
+
+int mlx5dr_domain_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
+ u16 vport_num,
+ u64 *rx_icm_addr)
+{
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
+ int ret;
+
+ recalc_cs_ft = xa_load(&dmn->csum_fts_xa, vport_num);
+ if (!recalc_cs_ft) {
+ /* Table hasn't been created yet */
+ recalc_cs_ft = mlx5dr_fw_create_recalc_cs_ft(dmn, vport_num);
+ if (!recalc_cs_ft)
+ return -EINVAL;
+
+ ret = xa_err(xa_store(&dmn->csum_fts_xa, vport_num,
+ recalc_cs_ft, GFP_KERNEL));
+ if (ret)
+ return ret;
+ }
+
+ *rx_icm_addr = recalc_cs_ft->rx_icm_addr;
+
+ return 0;
+}
+
+static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
+{
+ int ret;
+
+ dmn->ste_ctx = mlx5dr_ste_get_ctx(dmn->info.caps.sw_format_ver);
+ if (!dmn->ste_ctx) {
+ mlx5dr_err(dmn, "SW Steering on this device is unsupported\n");
+ return -EOPNOTSUPP;
+ }
+
+ ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn);
+ if (ret) {
+ mlx5dr_err(dmn, "Couldn't allocate PD, ret: %d", ret);
+ return ret;
+ }
+
+ dmn->uar = mlx5_get_uars_page(dmn->mdev);
+ if (IS_ERR(dmn->uar)) {
+ mlx5dr_err(dmn, "Couldn't allocate UAR\n");
+ ret = PTR_ERR(dmn->uar);
+ goto clean_pd;
+ }
+
+ dmn->ste_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_STE);
+ if (!dmn->ste_icm_pool) {
+ mlx5dr_err(dmn, "Couldn't get icm memory\n");
+ ret = -ENOMEM;
+ goto clean_uar;
+ }
+
+ dmn->action_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_ACTION);
+ if (!dmn->action_icm_pool) {
+ mlx5dr_err(dmn, "Couldn't get action icm memory\n");
+ ret = -ENOMEM;
+ goto free_ste_icm_pool;
+ }
+
+ ret = mlx5dr_send_ring_alloc(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Couldn't create send-ring\n");
+ goto free_action_icm_pool;
+ }
+
+ return 0;
+
+free_action_icm_pool:
+ mlx5dr_icm_pool_destroy(dmn->action_icm_pool);
+free_ste_icm_pool:
+ mlx5dr_icm_pool_destroy(dmn->ste_icm_pool);
+clean_uar:
+ mlx5_put_uars_page(dmn->mdev, dmn->uar);
+clean_pd:
+ mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn);
+
+ return ret;
+}
+
+static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn)
+{
+ mlx5dr_send_ring_free(dmn, dmn->send_ring);
+ mlx5dr_icm_pool_destroy(dmn->action_icm_pool);
+ mlx5dr_icm_pool_destroy(dmn->ste_icm_pool);
+ mlx5_put_uars_page(dmn->mdev, dmn->uar);
+ mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn);
+}
+
+static void dr_domain_fill_uplink_caps(struct mlx5dr_domain *dmn,
+ struct mlx5dr_cmd_vport_cap *uplink_vport)
+{
+ struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps;
+
+ uplink_vport->num = MLX5_VPORT_UPLINK;
+ uplink_vport->icm_address_rx = esw_caps->uplink_icm_address_rx;
+ uplink_vport->icm_address_tx = esw_caps->uplink_icm_address_tx;
+ uplink_vport->vport_gvmi = 0;
+ uplink_vport->vhca_gvmi = dmn->info.caps.gvmi;
+}
+
+static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
+ u16 vport_number,
+ bool other_vport,
+ struct mlx5dr_cmd_vport_cap *vport_caps)
+{
+ int ret;
+
+ ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev,
+ other_vport,
+ vport_number,
+ &vport_caps->icm_address_rx,
+ &vport_caps->icm_address_tx);
+ if (ret)
+ return ret;
+
+ ret = mlx5dr_cmd_query_gvmi(dmn->mdev,
+ other_vport,
+ vport_number,
+ &vport_caps->vport_gvmi);
+ if (ret)
+ return ret;
+
+ vport_caps->num = vport_number;
+ vport_caps->vhca_gvmi = dmn->info.caps.gvmi;
+
+ return 0;
+}
+
+static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn)
+{
+ return dr_domain_query_vport(dmn, 0, false,
+ &dmn->info.caps.vports.esw_manager_caps);
+}
+
+static void dr_domain_query_uplink(struct mlx5dr_domain *dmn)
+{
+ dr_domain_fill_uplink_caps(dmn, &dmn->info.caps.vports.uplink_caps);
+}
+
+static struct mlx5dr_cmd_vport_cap *
+dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+ struct mlx5dr_cmd_vport_cap *vport_caps;
+ int ret;
+
+ vport_caps = kvzalloc(sizeof(*vport_caps), GFP_KERNEL);
+ if (!vport_caps)
+ return NULL;
+
+ ret = dr_domain_query_vport(dmn, vport, true, vport_caps);
+ if (ret) {
+ kvfree(vport_caps);
+ return NULL;
+ }
+
+ ret = xa_insert(&caps->vports.vports_caps_xa, vport,
+ vport_caps, GFP_KERNEL);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Couldn't insert new vport into xarray (%d)\n", ret);
+ kvfree(vport_caps);
+ return ERR_PTR(ret);
+ }
+
+ return vport_caps;
+}
+
+static bool dr_domain_is_esw_mgr_vport(struct mlx5dr_domain *dmn, u16 vport)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+ return (caps->is_ecpf && vport == MLX5_VPORT_ECPF) ||
+ (!caps->is_ecpf && vport == 0);
+}
+
+struct mlx5dr_cmd_vport_cap *
+mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+ struct mlx5dr_cmd_vport_cap *vport_caps;
+
+ if (dr_domain_is_esw_mgr_vport(dmn, vport))
+ return &caps->vports.esw_manager_caps;
+
+ if (vport == MLX5_VPORT_UPLINK)
+ return &caps->vports.uplink_caps;
+
+vport_load:
+ vport_caps = xa_load(&caps->vports.vports_caps_xa, vport);
+ if (vport_caps)
+ return vport_caps;
+
+ vport_caps = dr_domain_add_vport_cap(dmn, vport);
+ if (PTR_ERR(vport_caps) == -EBUSY)
+ /* caps were already stored by another thread */
+ goto vport_load;
+
+ return vport_caps;
+}
+
+static void dr_domain_clear_vports(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_cmd_vport_cap *vport_caps;
+ unsigned long i;
+
+ xa_for_each(&dmn->info.caps.vports.vports_caps_xa, i, vport_caps) {
+ vport_caps = xa_erase(&dmn->info.caps.vports.vports_caps_xa, i);
+ kvfree(vport_caps);
+ }
+}
+
+static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
+ struct mlx5dr_domain *dmn)
+{
+ int ret;
+
+ if (!dmn->info.caps.eswitch_manager)
+ return -EOPNOTSUPP;
+
+ ret = mlx5dr_cmd_query_esw_caps(mdev, &dmn->info.caps.esw_caps);
+ if (ret)
+ return ret;
+
+ dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner;
+ dmn->info.caps.fdb_sw_owner_v2 = dmn->info.caps.esw_caps.sw_owner_v2;
+ dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx;
+ dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx;
+
+ xa_init(&dmn->info.caps.vports.vports_caps_xa);
+
+ /* Query eswitch manager and uplink vports only. Rest of the
+ * vports (vport 0, VFs and SFs) will be queried dynamically.
+ */
+
+ ret = dr_domain_query_esw_mngr(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed to query eswitch manager vport caps (err: %d)", ret);
+ goto free_vports_caps_xa;
+ }
+
+ dr_domain_query_uplink(dmn);
+
+ return 0;
+
+free_vports_caps_xa:
+ xa_destroy(&dmn->info.caps.vports.vports_caps_xa);
+
+ return ret;
+}
+
+static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
+ struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_cmd_vport_cap *vport_cap;
+ int ret;
+
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) {
+ mlx5dr_err(dmn, "Failed to allocate domain, bad link type\n");
+ return -EOPNOTSUPP;
+ }
+
+ ret = mlx5dr_cmd_query_device(mdev, &dmn->info.caps);
+ if (ret)
+ return ret;
+
+ ret = dr_domain_query_fdb_caps(mdev, dmn);
+ if (ret)
+ return ret;
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, rx))
+ return -ENOTSUPP;
+
+ dmn->info.supp_sw_steering = true;
+ dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX;
+ dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address;
+ dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address;
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, tx))
+ return -ENOTSUPP;
+
+ dmn->info.supp_sw_steering = true;
+ dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX;
+ dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address;
+ dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address;
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ if (!dmn->info.caps.eswitch_manager)
+ return -ENOTSUPP;
+
+ if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, fdb))
+ return -ENOTSUPP;
+
+ dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX;
+ dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX;
+ vport_cap = &dmn->info.caps.vports.esw_manager_caps;
+
+ dmn->info.supp_sw_steering = true;
+ dmn->info.tx.default_icm_addr = vport_cap->icm_address_tx;
+ dmn->info.rx.default_icm_addr = vport_cap->icm_address_rx;
+ dmn->info.rx.drop_icm_addr = dmn->info.caps.esw_rx_drop_address;
+ dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address;
+ break;
+ default:
+ mlx5dr_err(dmn, "Invalid domain\n");
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static void dr_domain_caps_uninit(struct mlx5dr_domain *dmn)
+{
+ dr_domain_clear_vports(dmn);
+ xa_destroy(&dmn->info.caps.vports.vports_caps_xa);
+}
+
+struct mlx5dr_domain *
+mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
+{
+ struct mlx5dr_domain *dmn;
+ int ret;
+
+ if (type > MLX5DR_DOMAIN_TYPE_FDB)
+ return NULL;
+
+ dmn = kzalloc(sizeof(*dmn), GFP_KERNEL);
+ if (!dmn)
+ return NULL;
+
+ dmn->mdev = mdev;
+ dmn->type = type;
+ refcount_set(&dmn->refcount, 1);
+ mutex_init(&dmn->info.rx.mutex);
+ mutex_init(&dmn->info.tx.mutex);
+
+ if (dr_domain_caps_init(mdev, dmn)) {
+ mlx5dr_err(dmn, "Failed init domain, no caps\n");
+ goto free_domain;
+ }
+
+ dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K;
+ dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K,
+ dmn->info.caps.log_icm_size);
+
+ if (!dmn->info.supp_sw_steering) {
+ mlx5dr_err(dmn, "SW steering is not supported\n");
+ goto uninit_caps;
+ }
+
+ /* Allocate resources */
+ ret = dr_domain_init_resources(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed init domain resources\n");
+ goto uninit_caps;
+ }
+
+ dr_domain_init_csum_recalc_fts(dmn);
+ mlx5dr_dbg_init_dump(dmn);
+ return dmn;
+
+uninit_caps:
+ dr_domain_caps_uninit(dmn);
+free_domain:
+ kfree(dmn);
+ return NULL;
+}
+
+/* Assure synchronization of the device steering tables with updates made by SW
+ * insertion.
+ */
+int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
+{
+ int ret = 0;
+
+ if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) {
+ mlx5dr_domain_lock(dmn);
+ ret = mlx5dr_send_ring_force_drain(dmn);
+ mlx5dr_domain_unlock(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n",
+ flags, ret);
+ return ret;
+ }
+ }
+
+ if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW)
+ ret = mlx5dr_cmd_sync_steering(dmn->mdev);
+
+ return ret;
+}
+
+int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
+{
+ if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1))
+ return -EBUSY;
+
+ /* make sure resources are not used by the hardware */
+ mlx5dr_cmd_sync_steering(dmn->mdev);
+ mlx5dr_dbg_uninit_dump(dmn);
+ dr_domain_uninit_csum_recalc_fts(dmn);
+ dr_domain_uninit_resources(dmn);
+ dr_domain_caps_uninit(dmn);
+ mutex_destroy(&dmn->info.tx.mutex);
+ mutex_destroy(&dmn->info.rx.mutex);
+ kfree(dmn);
+ return 0;
+}
+
+void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain *peer_dmn)
+{
+ mlx5dr_domain_lock(dmn);
+
+ if (dmn->peer_dmn)
+ refcount_dec(&dmn->peer_dmn->refcount);
+
+ dmn->peer_dmn = peer_dmn;
+
+ if (dmn->peer_dmn)
+ refcount_inc(&dmn->peer_dmn->refcount);
+
+ mlx5dr_domain_unlock(dmn);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
new file mode 100644
index 000000000..f05ef0cd5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/types.h>
+#include "dr_types.h"
+
+struct mlx5dr_fw_recalc_cs_ft *
+mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u16 vport_num)
+{
+ struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
+ u32 table_id, group_id, modify_hdr_id;
+ u64 rx_icm_addr, modify_ttl_action;
+ int ret;
+
+ recalc_cs_ft = kzalloc(sizeof(*recalc_cs_ft), GFP_KERNEL);
+ if (!recalc_cs_ft)
+ return NULL;
+
+ ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB;
+ ft_attr.level = dmn->info.caps.max_ft_level - 1;
+ ft_attr.term_tbl = true;
+
+ ret = mlx5dr_cmd_create_flow_table(dmn->mdev,
+ &ft_attr,
+ &rx_icm_addr,
+ &table_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret);
+ goto free_ttl_tbl;
+ }
+
+ ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ table_id, &group_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed creating TTL W/A FW flow group %d\n", ret);
+ goto destroy_flow_table;
+ }
+
+ /* Modify TTL action by adding zero to trigger CS recalculation */
+ modify_ttl_action = 0;
+ MLX5_SET(set_action_in, &modify_ttl_action, action_type, MLX5_ACTION_TYPE_ADD);
+ MLX5_SET(set_action_in, &modify_ttl_action, field, MLX5_ACTION_IN_FIELD_OUT_IP_TTL);
+
+ ret = mlx5dr_cmd_alloc_modify_header(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, 1,
+ &modify_ttl_action,
+ &modify_hdr_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed modify header TTL %d\n", ret);
+ goto destroy_flow_group;
+ }
+
+ ret = mlx5dr_cmd_set_fte_modify_and_vport(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ table_id, group_id, modify_hdr_id,
+ vport_num);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed setting TTL W/A flow table entry %d\n", ret);
+ goto dealloc_modify_header;
+ }
+
+ recalc_cs_ft->modify_hdr_id = modify_hdr_id;
+ recalc_cs_ft->rx_icm_addr = rx_icm_addr;
+ recalc_cs_ft->table_id = table_id;
+ recalc_cs_ft->group_id = group_id;
+
+ return recalc_cs_ft;
+
+dealloc_modify_header:
+ mlx5dr_cmd_dealloc_modify_header(dmn->mdev, modify_hdr_id);
+destroy_flow_group:
+ mlx5dr_cmd_destroy_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ table_id, group_id);
+destroy_flow_table:
+ mlx5dr_cmd_destroy_flow_table(dmn->mdev, table_id, MLX5_FLOW_TABLE_TYPE_FDB);
+free_ttl_tbl:
+ kfree(recalc_cs_ft);
+ return NULL;
+}
+
+void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft)
+{
+ mlx5dr_cmd_del_flow_table_entry(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ recalc_cs_ft->table_id);
+ mlx5dr_cmd_dealloc_modify_header(dmn->mdev, recalc_cs_ft->modify_hdr_id);
+ mlx5dr_cmd_destroy_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ recalc_cs_ft->table_id,
+ recalc_cs_ft->group_id);
+ mlx5dr_cmd_destroy_flow_table(dmn->mdev,
+ recalc_cs_ft->table_id,
+ MLX5_FLOW_TABLE_TYPE_FDB);
+
+ kfree(recalc_cs_ft);
+}
+
+int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_cmd_flow_destination_hw_info *dest,
+ int num_dest,
+ bool reformat_req,
+ u32 *tbl_id,
+ u32 *group_id,
+ bool ignore_flow_level,
+ u32 flow_source)
+{
+ struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
+ struct mlx5dr_cmd_fte_info fte_info = {};
+ u32 val[MLX5_ST_SZ_DW_MATCH_PARAM] = {};
+ struct mlx5dr_cmd_ft_info ft_info = {};
+ int ret;
+
+ ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB;
+ ft_attr.level = min_t(int, dmn->info.caps.max_ft_level - 2,
+ MLX5_FT_MAX_MULTIPATH_LEVEL);
+ ft_attr.reformat_en = reformat_req;
+ ft_attr.decap_en = reformat_req;
+
+ ret = mlx5dr_cmd_create_flow_table(dmn->mdev, &ft_attr, NULL, tbl_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed creating multi dest FW flow table %d\n", ret);
+ return ret;
+ }
+
+ ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ *tbl_id, group_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed creating multi dest FW flow group %d\n", ret);
+ goto free_flow_table;
+ }
+
+ ft_info.id = *tbl_id;
+ ft_info.type = FS_FT_FDB;
+ fte_info.action.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ fte_info.dests_size = num_dest;
+ fte_info.val = val;
+ fte_info.dest_arr = dest;
+ fte_info.ignore_flow_level = ignore_flow_level;
+ fte_info.flow_context.flow_source = flow_source;
+
+ ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed setting fte into table %d\n", ret);
+ goto free_flow_group;
+ }
+
+ return 0;
+
+free_flow_group:
+ mlx5dr_cmd_destroy_flow_group(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB,
+ *tbl_id, *group_id);
+free_flow_table:
+ mlx5dr_cmd_destroy_flow_table(dmn->mdev, *tbl_id,
+ MLX5_FLOW_TABLE_TYPE_FDB);
+ return ret;
+}
+
+void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn,
+ u32 tbl_id, u32 group_id)
+{
+ mlx5dr_cmd_del_flow_table_entry(dmn->mdev, FS_FT_FDB, tbl_id);
+ mlx5dr_cmd_destroy_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ tbl_id, group_id);
+ mlx5dr_cmd_destroy_flow_table(dmn->mdev, tbl_id,
+ MLX5_FLOW_TABLE_TYPE_FDB);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
new file mode 100644
index 000000000..4ca67fa24
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
+
+struct mlx5dr_icm_pool {
+ enum mlx5dr_icm_type icm_type;
+ enum mlx5dr_icm_chunk_size max_log_chunk_sz;
+ struct mlx5dr_domain *dmn;
+ /* memory management */
+ struct mutex mutex; /* protect the ICM pool and ICM buddy */
+ struct list_head buddy_mem_list;
+ u64 hot_memory_size;
+};
+
+struct mlx5dr_icm_dm {
+ u32 obj_id;
+ enum mlx5_sw_icm_type type;
+ phys_addr_t addr;
+ size_t length;
+};
+
+struct mlx5dr_icm_mr {
+ u32 mkey;
+ struct mlx5dr_icm_dm dm;
+ struct mlx5dr_domain *dmn;
+ size_t length;
+ u64 icm_start_addr;
+};
+
+static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
+ u32 pd, u64 length, u64 start_addr, int mode,
+ u32 *mkey)
+{
+ u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, access_mode_1_0, mode);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) {
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ }
+
+ MLX5_SET64(mkc, mkc, len, length);
+ MLX5_SET(mkc, mkc, pd, pd);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr, start_addr);
+
+ return mlx5_core_create_mkey(mdev, mkey, in, inlen);
+}
+
+u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk)
+{
+ u32 offset = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type);
+
+ return (u64)offset * chunk->seg;
+}
+
+u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk)
+{
+ return chunk->buddy_mem->icm_mr->mkey;
+}
+
+u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk)
+{
+ u32 size = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type);
+
+ return (u64)chunk->buddy_mem->icm_mr->icm_start_addr + size * chunk->seg;
+}
+
+u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk)
+{
+ return mlx5dr_icm_pool_chunk_size_to_byte(chunk->size,
+ chunk->buddy_mem->pool->icm_type);
+}
+
+u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk)
+{
+ return mlx5dr_icm_pool_chunk_size_to_entries(chunk->size);
+}
+
+static struct mlx5dr_icm_mr *
+dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
+{
+ struct mlx5_core_dev *mdev = pool->dmn->mdev;
+ enum mlx5_sw_icm_type dm_type;
+ struct mlx5dr_icm_mr *icm_mr;
+ size_t log_align_base;
+ int err;
+
+ icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL);
+ if (!icm_mr)
+ return NULL;
+
+ icm_mr->dmn = pool->dmn;
+
+ icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type);
+
+ if (pool->icm_type == DR_ICM_TYPE_STE) {
+ dm_type = MLX5_SW_ICM_TYPE_STEERING;
+ log_align_base = ilog2(icm_mr->dm.length);
+ } else {
+ dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ /* Align base is 64B */
+ log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
+ }
+ icm_mr->dm.type = dm_type;
+
+ err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length,
+ log_align_base, 0, &icm_mr->dm.addr,
+ &icm_mr->dm.obj_id);
+ if (err) {
+ mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err);
+ goto free_icm_mr;
+ }
+
+ /* Register device memory */
+ err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn,
+ icm_mr->dm.length,
+ icm_mr->dm.addr,
+ MLX5_MKC_ACCESS_MODE_SW_ICM,
+ &icm_mr->mkey);
+ if (err) {
+ mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err);
+ goto free_dm;
+ }
+
+ icm_mr->icm_start_addr = icm_mr->dm.addr;
+
+ if (icm_mr->icm_start_addr & (BIT(log_align_base) - 1)) {
+ mlx5dr_err(pool->dmn, "Failed to get Aligned ICM mem (asked: %zu)\n",
+ log_align_base);
+ goto free_mkey;
+ }
+
+ return icm_mr;
+
+free_mkey:
+ mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
+free_dm:
+ mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
+ icm_mr->dm.addr, icm_mr->dm.obj_id);
+free_icm_mr:
+ kvfree(icm_mr);
+ return NULL;
+}
+
+static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
+{
+ struct mlx5_core_dev *mdev = icm_mr->dmn->mdev;
+ struct mlx5dr_icm_dm *dm = &icm_mr->dm;
+
+ mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
+ mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0,
+ dm->addr, dm->obj_id);
+ kvfree(icm_mr);
+}
+
+static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ /* We support only one type of STE size, both for ConnectX-5 and later
+ * devices. Once the support for match STE which has a larger tag is
+ * added (32B instead of 16B), the STE size for devices later than
+ * ConnectX-5 needs to account for that.
+ */
+ return DR_STE_SIZE_REDUCED;
+}
+
+static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset)
+{
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+ int index = offset / DR_STE_SIZE;
+
+ chunk->ste_arr = &buddy->ste_arr[index];
+ chunk->miss_list = &buddy->miss_list[index];
+ chunk->hw_ste_arr = buddy->hw_ste_arr +
+ index * dr_icm_buddy_get_ste_size(buddy);
+}
+
+static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
+{
+ int num_of_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+
+ memset(chunk->hw_ste_arr, 0,
+ num_of_entries * dr_icm_buddy_get_ste_size(buddy));
+ memset(chunk->ste_arr, 0,
+ num_of_entries * sizeof(chunk->ste_arr[0]));
+}
+
+static enum mlx5dr_icm_type
+get_chunk_icm_type(struct mlx5dr_icm_chunk *chunk)
+{
+ return chunk->buddy_mem->pool->icm_type;
+}
+
+static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk,
+ struct mlx5dr_icm_buddy_mem *buddy)
+{
+ enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk);
+
+ buddy->used_memory -= mlx5dr_icm_pool_get_chunk_byte_size(chunk);
+ list_del(&chunk->chunk_list);
+
+ if (icm_type == DR_ICM_TYPE_STE)
+ dr_icm_chunk_ste_cleanup(chunk);
+
+ kvfree(chunk);
+}
+
+static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ int num_of_entries =
+ mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz);
+
+ buddy->ste_arr = kvcalloc(num_of_entries,
+ sizeof(struct mlx5dr_ste), GFP_KERNEL);
+ if (!buddy->ste_arr)
+ return -ENOMEM;
+
+ /* Preallocate full STE size on non-ConnectX-5 devices since
+ * we need to support both full and reduced with the same cache.
+ */
+ buddy->hw_ste_arr = kvcalloc(num_of_entries,
+ dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL);
+ if (!buddy->hw_ste_arr)
+ goto free_ste_arr;
+
+ buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL);
+ if (!buddy->miss_list)
+ goto free_hw_ste_arr;
+
+ return 0;
+
+free_hw_ste_arr:
+ kvfree(buddy->hw_ste_arr);
+free_ste_arr:
+ kvfree(buddy->ste_arr);
+ return -ENOMEM;
+}
+
+static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ kvfree(buddy->ste_arr);
+ kvfree(buddy->hw_ste_arr);
+ kvfree(buddy->miss_list);
+}
+
+static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
+{
+ struct mlx5dr_icm_buddy_mem *buddy;
+ struct mlx5dr_icm_mr *icm_mr;
+
+ icm_mr = dr_icm_pool_mr_create(pool);
+ if (!icm_mr)
+ return -ENOMEM;
+
+ buddy = kvzalloc(sizeof(*buddy), GFP_KERNEL);
+ if (!buddy)
+ goto free_mr;
+
+ if (mlx5dr_buddy_init(buddy, pool->max_log_chunk_sz))
+ goto err_free_buddy;
+
+ buddy->icm_mr = icm_mr;
+ buddy->pool = pool;
+
+ if (pool->icm_type == DR_ICM_TYPE_STE) {
+ /* Reduce allocations by preallocating and reusing the STE structures */
+ if (dr_icm_buddy_init_ste_cache(buddy))
+ goto err_cleanup_buddy;
+ }
+
+ /* add it to the -start- of the list in order to search in it first */
+ list_add(&buddy->list_node, &pool->buddy_mem_list);
+
+ return 0;
+
+err_cleanup_buddy:
+ mlx5dr_buddy_cleanup(buddy);
+err_free_buddy:
+ kvfree(buddy);
+free_mr:
+ dr_icm_pool_mr_destroy(icm_mr);
+ return -ENOMEM;
+}
+
+static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ struct mlx5dr_icm_chunk *chunk, *next;
+
+ list_for_each_entry_safe(chunk, next, &buddy->hot_list, chunk_list)
+ dr_icm_chunk_destroy(chunk, buddy);
+
+ list_for_each_entry_safe(chunk, next, &buddy->used_list, chunk_list)
+ dr_icm_chunk_destroy(chunk, buddy);
+
+ dr_icm_pool_mr_destroy(buddy->icm_mr);
+
+ mlx5dr_buddy_cleanup(buddy);
+
+ if (buddy->pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_buddy_cleanup_ste_cache(buddy);
+
+ kvfree(buddy);
+}
+
+static struct mlx5dr_icm_chunk *
+dr_icm_chunk_create(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size,
+ struct mlx5dr_icm_buddy_mem *buddy_mem_pool,
+ unsigned int seg)
+{
+ struct mlx5dr_icm_chunk *chunk;
+ int offset;
+
+ chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL);
+ if (!chunk)
+ return NULL;
+
+ offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg;
+
+ chunk->seg = seg;
+ chunk->size = chunk_size;
+ chunk->buddy_mem = buddy_mem_pool;
+
+ if (pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_chunk_ste_init(chunk, offset);
+
+ buddy_mem_pool->used_memory += mlx5dr_icm_pool_get_chunk_byte_size(chunk);
+ INIT_LIST_HEAD(&chunk->chunk_list);
+
+ /* chunk now is part of the used_list */
+ list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list);
+
+ return chunk;
+}
+
+static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool)
+{
+ int allow_hot_size;
+
+ /* sync when hot memory reaches half of the pool size */
+ allow_hot_size =
+ mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type) / 2;
+
+ return pool->hot_memory_size > allow_hot_size;
+}
+
+static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool)
+{
+ struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy;
+ u32 num_entries;
+ int err;
+
+ err = mlx5dr_cmd_sync_steering(pool->dmn->mdev);
+ if (err) {
+ mlx5dr_err(pool->dmn, "Failed to sync to HW (err: %d)\n", err);
+ return err;
+ }
+
+ list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) {
+ struct mlx5dr_icm_chunk *chunk, *tmp_chunk;
+
+ list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) {
+ num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
+ mlx5dr_buddy_free_mem(buddy, chunk->seg, ilog2(num_entries));
+ pool->hot_memory_size -= mlx5dr_icm_pool_get_chunk_byte_size(chunk);
+ dr_icm_chunk_destroy(chunk, buddy);
+ }
+
+ if (!buddy->used_memory && pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_buddy_destroy(buddy);
+ }
+
+ return 0;
+}
+
+static int dr_icm_handle_buddies_get_mem(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size,
+ struct mlx5dr_icm_buddy_mem **buddy,
+ unsigned int *seg)
+{
+ struct mlx5dr_icm_buddy_mem *buddy_mem_pool;
+ bool new_mem = false;
+ int err;
+
+alloc_buddy_mem:
+ /* find the next free place from the buddy list */
+ list_for_each_entry(buddy_mem_pool, &pool->buddy_mem_list, list_node) {
+ err = mlx5dr_buddy_alloc_mem(buddy_mem_pool,
+ chunk_size, seg);
+ if (!err)
+ goto found;
+
+ if (WARN_ON(new_mem)) {
+ /* We have new memory pool, first in the list */
+ mlx5dr_err(pool->dmn,
+ "No memory for order: %d\n",
+ chunk_size);
+ goto out;
+ }
+ }
+
+ /* no more available allocators in that pool, create new */
+ err = dr_icm_buddy_create(pool);
+ if (err) {
+ mlx5dr_err(pool->dmn,
+ "Failed creating buddy for order %d\n",
+ chunk_size);
+ goto out;
+ }
+
+ /* mark we have new memory, first in list */
+ new_mem = true;
+ goto alloc_buddy_mem;
+
+found:
+ *buddy = buddy_mem_pool;
+out:
+ return err;
+}
+
+/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and
+ * also memory used for HW STE management for optimizations.
+ */
+struct mlx5dr_icm_chunk *
+mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size)
+{
+ struct mlx5dr_icm_chunk *chunk = NULL;
+ struct mlx5dr_icm_buddy_mem *buddy;
+ unsigned int seg;
+ int ret;
+
+ if (chunk_size > pool->max_log_chunk_sz)
+ return NULL;
+
+ mutex_lock(&pool->mutex);
+ /* find mem, get back the relevant buddy pool and seg in that mem */
+ ret = dr_icm_handle_buddies_get_mem(pool, chunk_size, &buddy, &seg);
+ if (ret)
+ goto out;
+
+ chunk = dr_icm_chunk_create(pool, chunk_size, buddy, seg);
+ if (!chunk)
+ goto out_err;
+
+ goto out;
+
+out_err:
+ mlx5dr_buddy_free_mem(buddy, seg, chunk_size);
+out:
+ mutex_unlock(&pool->mutex);
+ return chunk;
+}
+
+void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk)
+{
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+ struct mlx5dr_icm_pool *pool = buddy->pool;
+
+ /* move the memory to the waiting list AKA "hot" */
+ mutex_lock(&pool->mutex);
+ list_move_tail(&chunk->chunk_list, &buddy->hot_list);
+ pool->hot_memory_size += mlx5dr_icm_pool_get_chunk_byte_size(chunk);
+
+ /* Check if we have chunks that are waiting for sync-ste */
+ if (dr_icm_pool_is_sync_required(pool))
+ dr_icm_pool_sync_all_buddy_pools(pool);
+
+ mutex_unlock(&pool->mutex);
+}
+
+struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
+ enum mlx5dr_icm_type icm_type)
+{
+ enum mlx5dr_icm_chunk_size max_log_chunk_sz;
+ struct mlx5dr_icm_pool *pool;
+
+ if (icm_type == DR_ICM_TYPE_STE)
+ max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
+ else
+ max_log_chunk_sz = dmn->info.max_log_action_icm_sz;
+
+ pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return NULL;
+
+ pool->dmn = dmn;
+ pool->icm_type = icm_type;
+ pool->max_log_chunk_sz = max_log_chunk_sz;
+
+ INIT_LIST_HEAD(&pool->buddy_mem_list);
+
+ mutex_init(&pool->mutex);
+
+ return pool;
+}
+
+void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool)
+{
+ struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy;
+
+ list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node)
+ dr_icm_buddy_destroy(buddy);
+
+ mutex_destroy(&pool->mutex);
+ kvfree(pool);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
new file mode 100644
index 000000000..0726848eb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -0,0 +1,1108 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+static bool dr_mask_is_smac_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->smac_47_16 || spec->smac_15_0);
+}
+
+static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->dmac_47_16 || spec->dmac_15_0);
+}
+
+static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->ip_protocol || spec->frag || spec->tcp_flags ||
+ spec->ip_ecn || spec->ip_dscp);
+}
+
+static bool dr_mask_is_tcp_udp_base_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->tcp_sport || spec->tcp_dport ||
+ spec->udp_sport || spec->udp_dport);
+}
+
+static bool dr_mask_is_ipv4_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->dst_ip_31_0 || spec->src_ip_31_0);
+}
+
+static bool dr_mask_is_ipv4_5_tuple_set(struct mlx5dr_match_spec *spec)
+{
+ return (dr_mask_is_l3_base_set(spec) ||
+ dr_mask_is_tcp_udp_base_set(spec) ||
+ dr_mask_is_ipv4_set(spec));
+}
+
+static bool dr_mask_is_eth_l2_tnl_set(struct mlx5dr_match_misc *misc)
+{
+ return misc->vxlan_vni;
+}
+
+static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec)
+{
+ return spec->ttl_hoplimit;
+}
+
+static bool dr_mask_is_ipv4_ihl_set(struct mlx5dr_match_spec *spec)
+{
+ return spec->ipv4_ihl;
+}
+
+#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \
+ (_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \
+ (_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \
+ (_spec).ethertype || (_spec).ip_version || \
+ (_misc)._inner_outer##_second_vid || \
+ (_misc)._inner_outer##_second_cfi || \
+ (_misc)._inner_outer##_second_prio || \
+ (_misc)._inner_outer##_second_cvlan_tag || \
+ (_misc)._inner_outer##_second_svlan_tag)
+
+#define DR_MASK_IS_ETH_L4_SET(_spec, _misc, _inner_outer) ( \
+ dr_mask_is_l3_base_set(&(_spec)) || \
+ dr_mask_is_tcp_udp_base_set(&(_spec)) || \
+ dr_mask_is_ttl_set(&(_spec)) || \
+ (_misc)._inner_outer##_ipv6_flow_label)
+
+#define DR_MASK_IS_ETH_L4_MISC_SET(_misc3, _inner_outer) ( \
+ (_misc3)._inner_outer##_tcp_seq_num || \
+ (_misc3)._inner_outer##_tcp_ack_num)
+
+#define DR_MASK_IS_FIRST_MPLS_SET(_misc2, _inner_outer) ( \
+ (_misc2)._inner_outer##_first_mpls_label || \
+ (_misc2)._inner_outer##_first_mpls_exp || \
+ (_misc2)._inner_outer##_first_mpls_s_bos || \
+ (_misc2)._inner_outer##_first_mpls_ttl)
+
+static bool dr_mask_is_tnl_gre_set(struct mlx5dr_match_misc *misc)
+{
+ return (misc->gre_key_h || misc->gre_key_l ||
+ misc->gre_protocol || misc->gre_c_present ||
+ misc->gre_k_present || misc->gre_s_present);
+}
+
+#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_gre_label || \
+ (_misc)->outer_first_mpls_over_gre_exp || \
+ (_misc)->outer_first_mpls_over_gre_s_bos || \
+ (_misc)->outer_first_mpls_over_gre_ttl)
+
+#define DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_udp_label || \
+ (_misc)->outer_first_mpls_over_udp_exp || \
+ (_misc)->outer_first_mpls_over_udp_s_bos || \
+ (_misc)->outer_first_mpls_over_udp_ttl)
+
+static bool
+dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3)
+{
+ return (misc3->outer_vxlan_gpe_vni ||
+ misc3->outer_vxlan_gpe_next_protocol ||
+ misc3->outer_vxlan_gpe_flags);
+}
+
+static bool
+dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps)
+{
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ (caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED);
+}
+
+static bool
+dr_mask_is_tnl_vxlan_gpe(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_mask_is_vxlan_gpe_set(&mask->misc3) &&
+ dr_matcher_supp_vxlan_gpe(&dmn->info.caps);
+}
+
+static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc)
+{
+ return misc->geneve_vni ||
+ misc->geneve_oam ||
+ misc->geneve_protocol_type ||
+ misc->geneve_opt_len;
+}
+
+static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3)
+{
+ return misc3->geneve_tlv_option_0_data;
+}
+
+static bool
+dr_matcher_supp_flex_parser_ok(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_parser_ok_bits_supp;
+}
+
+static bool dr_mask_is_tnl_geneve_tlv_opt_exist_set(struct mlx5dr_match_misc *misc,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_matcher_supp_flex_parser_ok(&dmn->info.caps) &&
+ misc->geneve_tlv_option_0_exist;
+}
+
+static bool
+dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps)
+{
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED);
+}
+
+static bool
+dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_mask_is_tnl_geneve_set(&mask->misc) &&
+ dr_matcher_supp_tnl_geneve(&dmn->info.caps);
+}
+
+static bool dr_mask_is_tnl_gtpu_set(struct mlx5dr_match_misc3 *misc3)
+{
+ return misc3->gtpu_msg_flags || misc3->gtpu_msg_type || misc3->gtpu_teid;
+}
+
+static bool dr_matcher_supp_tnl_gtpu(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_mask_is_tnl_gtpu_set(&mask->misc3) &&
+ dr_matcher_supp_tnl_gtpu(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_dw_0(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_dw_0(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return mask->misc3.gtpu_dw_0 &&
+ dr_matcher_supp_tnl_gtpu_dw_0(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_teid(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_teid(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return mask->misc3.gtpu_teid &&
+ dr_matcher_supp_tnl_gtpu_teid(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_dw_2(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_dw_2(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return mask->misc3.gtpu_dw_2 &&
+ dr_matcher_supp_tnl_gtpu_dw_2(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_first_ext(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_first_ext(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return mask->misc3.gtpu_first_ext_dw_0 &&
+ dr_matcher_supp_tnl_gtpu_first_ext(&dmn->info.caps);
+}
+
+static bool dr_mask_is_tnl_gtpu_flex_parser_0(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+ return (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_0) &&
+ dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) ||
+ (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_teid) &&
+ dr_mask_is_tnl_gtpu_teid(mask, dmn)) ||
+ (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_2) &&
+ dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) ||
+ (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_first_ext_dw_0) &&
+ dr_mask_is_tnl_gtpu_first_ext(mask, dmn));
+}
+
+static bool dr_mask_is_tnl_gtpu_flex_parser_1(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+ return (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_0) &&
+ dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) ||
+ (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_teid) &&
+ dr_mask_is_tnl_gtpu_teid(mask, dmn)) ||
+ (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_2) &&
+ dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) ||
+ (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_first_ext_dw_0) &&
+ dr_mask_is_tnl_gtpu_first_ext(mask, dmn));
+}
+
+static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return dr_mask_is_tnl_gtpu_flex_parser_0(mask, dmn) ||
+ dr_mask_is_tnl_gtpu_flex_parser_1(mask, dmn) ||
+ dr_mask_is_tnl_gtpu(mask, dmn);
+}
+
+static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps)
+{
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED);
+}
+
+static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps)
+{
+ return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
+ (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED);
+}
+
+static bool dr_mask_is_icmpv6_set(struct mlx5dr_match_misc3 *misc3)
+{
+ return (misc3->icmpv6_type || misc3->icmpv6_code ||
+ misc3->icmpv6_header_data);
+}
+
+static bool dr_mask_is_icmp(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ if (DR_MASK_IS_ICMPV4_SET(&mask->misc3))
+ return dr_matcher_supp_icmp_v4(&dmn->info.caps);
+ else if (dr_mask_is_icmpv6_set(&mask->misc3))
+ return dr_matcher_supp_icmp_v6(&dmn->info.caps);
+
+ return false;
+}
+
+static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2)
+{
+ return misc2->metadata_reg_a;
+}
+
+static bool dr_mask_is_reg_c_0_3_set(struct mlx5dr_match_misc2 *misc2)
+{
+ return (misc2->metadata_reg_c_0 || misc2->metadata_reg_c_1 ||
+ misc2->metadata_reg_c_2 || misc2->metadata_reg_c_3);
+}
+
+static bool dr_mask_is_reg_c_4_7_set(struct mlx5dr_match_misc2 *misc2)
+{
+ return (misc2->metadata_reg_c_4 || misc2->metadata_reg_c_5 ||
+ misc2->metadata_reg_c_6 || misc2->metadata_reg_c_7);
+}
+
+static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc)
+{
+ return (misc->source_sqn || misc->source_port);
+}
+
+static bool dr_mask_is_flex_parser_id_0_3_set(u32 flex_parser_id,
+ u32 flex_parser_value)
+{
+ if (flex_parser_id)
+ return flex_parser_id <= DR_STE_MAX_FLEX_0_ID;
+
+ /* Using flex_parser 0 means that id is zero, thus value must be set. */
+ return flex_parser_value;
+}
+
+static bool dr_mask_is_flex_parser_0_3_set(struct mlx5dr_match_misc4 *misc4)
+{
+ return (dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_0,
+ misc4->prog_sample_field_value_0) ||
+ dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_1,
+ misc4->prog_sample_field_value_1) ||
+ dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_2,
+ misc4->prog_sample_field_value_2) ||
+ dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_3,
+ misc4->prog_sample_field_value_3));
+}
+
+static bool dr_mask_is_flex_parser_id_4_7_set(u32 flex_parser_id)
+{
+ return flex_parser_id > DR_STE_MAX_FLEX_0_ID &&
+ flex_parser_id <= DR_STE_MAX_FLEX_1_ID;
+}
+
+static bool dr_mask_is_flex_parser_4_7_set(struct mlx5dr_match_misc4 *misc4)
+{
+ return (dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_0) ||
+ dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_1) ||
+ dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_2) ||
+ dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_3));
+}
+
+static int dr_matcher_supp_tnl_mpls_over_gre(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED;
+}
+
+static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(&mask->misc2) &&
+ dr_matcher_supp_tnl_mpls_over_gre(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED;
+}
+
+static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn)
+{
+ return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) &&
+ dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps);
+}
+
+static bool dr_mask_is_tnl_header_0_1_set(struct mlx5dr_match_misc5 *misc5)
+{
+ return misc5->tunnel_header_0 || misc5->tunnel_header_1;
+}
+
+int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ enum mlx5dr_ipv outer_ipv,
+ enum mlx5dr_ipv inner_ipv)
+{
+ nic_matcher->ste_builder =
+ nic_matcher->ste_builder_arr[outer_ipv][inner_ipv];
+ nic_matcher->num_of_builders =
+ nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv];
+
+ if (!nic_matcher->num_of_builders) {
+ mlx5dr_dbg(matcher->tbl->dmn,
+ "Rule not supported on this matcher due to IP related fields\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ enum mlx5dr_ipv outer_ipv,
+ enum mlx5dr_ipv inner_ipv)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
+ struct mlx5dr_match_param mask = {};
+ bool allow_empty_match = false;
+ struct mlx5dr_ste_build *sb;
+ bool inner, rx;
+ int idx = 0;
+ int ret, i;
+
+ sb = nic_matcher->ste_builder_arr[outer_ipv][inner_ipv];
+ rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
+
+ /* Create a temporary mask to track and clear used mask fields */
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER)
+ mask.outer = matcher->mask.outer;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC)
+ mask.misc = matcher->mask.misc;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_INNER)
+ mask.inner = matcher->mask.inner;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC2)
+ mask.misc2 = matcher->mask.misc2;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3)
+ mask.misc3 = matcher->mask.misc3;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4)
+ mask.misc4 = matcher->mask.misc4;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC5)
+ mask.misc5 = matcher->mask.misc5;
+
+ ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria,
+ &matcher->mask, NULL);
+ if (ret)
+ return ret;
+
+ /* Optimize RX pipe by reducing source port match, since
+ * the FDB RX part is connected only to the wire.
+ */
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
+ rx && mask.misc.source_port) {
+ mask.misc.source_port = 0;
+ mask.misc.source_eswitch_owner_vhca_id = 0;
+ allow_empty_match = true;
+ }
+
+ /* Outer */
+ if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER |
+ DR_MATCHER_CRITERIA_MISC |
+ DR_MATCHER_CRITERIA_MISC2 |
+ DR_MATCHER_CRITERIA_MISC3 |
+ DR_MATCHER_CRITERIA_MISC5)) {
+ inner = false;
+
+ if (dr_mask_is_wqe_metadata_set(&mask.misc2))
+ mlx5dr_ste_build_general_purpose(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_reg_c_0_3_set(&mask.misc2))
+ mlx5dr_ste_build_register_0(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_reg_c_4_7_set(&mask.misc2))
+ mlx5dr_ste_build_register_1(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) &&
+ (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+ dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) {
+ mlx5dr_ste_build_src_gvmi_qpn(ste_ctx, &sb[idx++],
+ &mask, dmn, inner, rx);
+ }
+
+ if (dr_mask_is_smac_set(&mask.outer) &&
+ dr_mask_is_dmac_set(&mask.outer)) {
+ mlx5dr_ste_build_eth_l2_src_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ }
+
+ if (dr_mask_is_smac_set(&mask.outer))
+ mlx5dr_ste_build_eth_l2_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer))
+ mlx5dr_ste_build_eth_l2_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (outer_ipv == DR_RULE_IPV6) {
+ if (DR_MASK_IS_DST_IP_SET(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (DR_MASK_IS_SRC_IP_SET(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer))
+ mlx5dr_ste_build_eth_ipv6_l3_l4(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ } else {
+ if (dr_mask_is_ipv4_5_tuple_set(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_ttl_set(&mask.outer) ||
+ dr_mask_is_ipv4_ihl_set(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ }
+
+ if (dr_mask_is_tnl_vxlan_gpe(&mask, dmn))
+ mlx5dr_ste_build_tnl_vxlan_gpe(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ else if (dr_mask_is_tnl_geneve(&mask, dmn)) {
+ mlx5dr_ste_build_tnl_geneve(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ if (dr_mask_is_tnl_geneve_tlv_opt(&mask.misc3))
+ mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ if (dr_mask_is_tnl_geneve_tlv_opt_exist_set(&mask.misc, dmn))
+ mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ } else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) {
+ if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn))
+ mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+
+ if (dr_mask_is_tnl_gtpu_flex_parser_1(&mask, dmn))
+ mlx5dr_ste_build_tnl_gtpu_flex_parser_1(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+
+ if (dr_mask_is_tnl_gtpu(&mask, dmn))
+ mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ } else if (dr_mask_is_tnl_header_0_1_set(&mask.misc5)) {
+ mlx5dr_ste_build_tnl_header_0_1(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ }
+
+ if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer))
+ mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer))
+ mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn))
+ mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn))
+ mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+
+ if (dr_mask_is_icmp(&mask, dmn))
+ mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+
+ if (dr_mask_is_tnl_gre_set(&mask.misc))
+ mlx5dr_ste_build_tnl_gre(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ }
+
+ /* Inner */
+ if (matcher->match_criteria & (DR_MATCHER_CRITERIA_INNER |
+ DR_MATCHER_CRITERIA_MISC |
+ DR_MATCHER_CRITERIA_MISC2 |
+ DR_MATCHER_CRITERIA_MISC3)) {
+ inner = true;
+
+ if (dr_mask_is_eth_l2_tnl_set(&mask.misc))
+ mlx5dr_ste_build_eth_l2_tnl(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_smac_set(&mask.inner) &&
+ dr_mask_is_dmac_set(&mask.inner)) {
+ mlx5dr_ste_build_eth_l2_src_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ }
+
+ if (dr_mask_is_smac_set(&mask.inner))
+ mlx5dr_ste_build_eth_l2_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner))
+ mlx5dr_ste_build_eth_l2_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (inner_ipv == DR_RULE_IPV6) {
+ if (DR_MASK_IS_DST_IP_SET(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (DR_MASK_IS_SRC_IP_SET(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner))
+ mlx5dr_ste_build_eth_ipv6_l3_l4(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ } else {
+ if (dr_mask_is_ipv4_5_tuple_set(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_ttl_set(&mask.inner) ||
+ dr_mask_is_ipv4_ihl_set(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+ }
+
+ if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, inner))
+ mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner))
+ mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+ if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn))
+ mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn))
+ mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ }
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) {
+ if (dr_mask_is_flex_parser_0_3_set(&mask.misc4))
+ mlx5dr_ste_build_flex_parser_0(ste_ctx, &sb[idx++],
+ &mask, false, rx);
+
+ if (dr_mask_is_flex_parser_4_7_set(&mask.misc4))
+ mlx5dr_ste_build_flex_parser_1(ste_ctx, &sb[idx++],
+ &mask, false, rx);
+ }
+
+ /* Empty matcher, takes all */
+ if ((!idx && allow_empty_match) ||
+ matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
+ mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx);
+
+ if (idx == 0) {
+ mlx5dr_err(dmn, "Cannot generate any valid rules from mask\n");
+ return -EINVAL;
+ }
+
+ /* Check that all mask fields were consumed */
+ for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) {
+ if (((u8 *)&mask)[i] != 0) {
+ mlx5dr_dbg(dmn, "Mask contains unsupported parameters\n");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ nic_matcher->ste_builder = sb;
+ nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv] = idx;
+
+ return 0;
+}
+
+static int dr_nic_matcher_connect(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *curr_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+{
+ struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl;
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_ste_htbl *prev_htbl;
+ int ret;
+
+ /* Connect end anchor hash table to next_htbl or to the default address */
+ if (next_nic_matcher) {
+ info.type = CONNECT_HIT;
+ info.hit_next_htbl = next_nic_matcher->s_htbl;
+ } else {
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_tbl->default_icm_addr;
+ }
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
+ curr_nic_matcher->e_anchor,
+ &info, info.type == CONNECT_HIT);
+ if (ret)
+ return ret;
+
+ /* Connect start hash table to end anchor */
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(curr_nic_matcher->e_anchor->chunk);
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
+ curr_nic_matcher->s_htbl,
+ &info, false);
+ if (ret)
+ return ret;
+
+ /* Connect previous hash table to matcher start hash table */
+ if (prev_nic_matcher)
+ prev_htbl = prev_nic_matcher->e_anchor;
+ else
+ prev_htbl = nic_tbl->s_anchor;
+
+ info.type = CONNECT_HIT;
+ info.hit_next_htbl = curr_nic_matcher->s_htbl;
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_htbl,
+ &info, true);
+ if (ret)
+ return ret;
+
+ /* Update the pointing ste and next hash table */
+ curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->chunk->ste_arr;
+ prev_htbl->chunk->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl;
+
+ if (next_nic_matcher) {
+ next_nic_matcher->s_htbl->pointing_ste =
+ curr_nic_matcher->e_anchor->chunk->ste_arr;
+ curr_nic_matcher->e_anchor->chunk->ste_arr[0].next_htbl =
+ next_nic_matcher->s_htbl;
+ }
+
+ return 0;
+}
+
+int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher, *prev_nic_matcher, *tmp_nic_matcher;
+ struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl;
+ bool first = true;
+ int ret;
+
+ /* If the nic matcher is already on its parent nic table list,
+ * then it is already connected to the chain of nic matchers.
+ */
+ if (!list_empty(&nic_matcher->list_node))
+ return 0;
+
+ next_nic_matcher = NULL;
+ list_for_each_entry(tmp_nic_matcher, &nic_tbl->nic_matcher_list, list_node) {
+ if (tmp_nic_matcher->prio >= nic_matcher->prio) {
+ next_nic_matcher = tmp_nic_matcher;
+ break;
+ }
+ first = false;
+ }
+
+ prev_nic_matcher = NULL;
+ if (next_nic_matcher && !first)
+ prev_nic_matcher = list_prev_entry(next_nic_matcher, list_node);
+ else if (!first)
+ prev_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list,
+ struct mlx5dr_matcher_rx_tx,
+ list_node);
+
+ ret = dr_nic_matcher_connect(dmn, nic_matcher,
+ next_nic_matcher, prev_nic_matcher);
+ if (ret)
+ return ret;
+
+ if (prev_nic_matcher)
+ list_add(&nic_matcher->list_node, &prev_nic_matcher->list_node);
+ else if (next_nic_matcher)
+ list_add_tail(&nic_matcher->list_node, &next_nic_matcher->list_node);
+ else
+ list_add(&nic_matcher->list_node, &nic_matcher->nic_tbl->nic_matcher_list);
+
+ return ret;
+}
+
+static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ mlx5dr_htbl_put(nic_matcher->s_htbl);
+ mlx5dr_htbl_put(nic_matcher->e_anchor);
+}
+
+static void dr_matcher_uninit_fdb(struct mlx5dr_matcher *matcher)
+{
+ dr_matcher_uninit_nic(&matcher->rx);
+ dr_matcher_uninit_nic(&matcher->tx);
+}
+
+static void dr_matcher_uninit(struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ dr_matcher_uninit_nic(&matcher->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ dr_matcher_uninit_nic(&matcher->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ dr_matcher_uninit_fdb(matcher);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+}
+
+static int dr_matcher_set_all_ste_builders(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+
+ dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV4);
+ dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV6);
+ dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV4);
+ dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV6);
+
+ if (!nic_matcher->ste_builder) {
+ mlx5dr_err(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ int ret;
+
+ nic_matcher->prio = matcher->prio;
+ INIT_LIST_HEAD(&nic_matcher->list_node);
+
+ ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher);
+ if (ret)
+ return ret;
+
+ nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ DR_CHUNK_SIZE_1,
+ MLX5DR_STE_LU_TYPE_DONT_CARE,
+ 0);
+ if (!nic_matcher->e_anchor)
+ return -ENOMEM;
+
+ nic_matcher->s_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ DR_CHUNK_SIZE_1,
+ nic_matcher->ste_builder[0].lu_type,
+ nic_matcher->ste_builder[0].byte_mask);
+ if (!nic_matcher->s_htbl) {
+ ret = -ENOMEM;
+ goto free_e_htbl;
+ }
+
+ /* make sure the tables exist while empty */
+ mlx5dr_htbl_get(nic_matcher->s_htbl);
+ mlx5dr_htbl_get(nic_matcher->e_anchor);
+
+ return 0;
+
+free_e_htbl:
+ mlx5dr_ste_htbl_free(nic_matcher->e_anchor);
+ return ret;
+}
+
+static int dr_matcher_init_fdb(struct mlx5dr_matcher *matcher)
+{
+ int ret;
+
+ ret = dr_matcher_init_nic(matcher, &matcher->rx);
+ if (ret)
+ return ret;
+
+ ret = dr_matcher_init_nic(matcher, &matcher->tx);
+ if (ret)
+ goto uninit_nic_rx;
+
+ return 0;
+
+uninit_nic_rx:
+ dr_matcher_uninit_nic(&matcher->rx);
+ return ret;
+}
+
+static int dr_matcher_copy_param(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *mask)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_match_parameters consumed_mask;
+ int i, ret = 0;
+
+ if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) {
+ mlx5dr_err(dmn, "Invalid match criteria attribute\n");
+ return -EINVAL;
+ }
+
+ if (mask) {
+ if (mask->match_sz > DR_SZ_MATCH_PARAM) {
+ mlx5dr_err(dmn, "Invalid match size attribute\n");
+ return -EINVAL;
+ }
+
+ consumed_mask.match_buf = kzalloc(mask->match_sz, GFP_KERNEL);
+ if (!consumed_mask.match_buf)
+ return -ENOMEM;
+
+ consumed_mask.match_sz = mask->match_sz;
+ memcpy(consumed_mask.match_buf, mask->match_buf, mask->match_sz);
+ mlx5dr_ste_copy_param(matcher->match_criteria,
+ &matcher->mask, &consumed_mask, true);
+
+ /* Check that all mask data was consumed */
+ for (i = 0; i < consumed_mask.match_sz; i++) {
+ if (!((u8 *)consumed_mask.match_buf)[i])
+ continue;
+
+ mlx5dr_dbg(dmn,
+ "Match param mask contains unsupported parameters\n");
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ kfree(consumed_mask.match_buf);
+ }
+
+ return ret;
+}
+
+static int dr_matcher_init(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *mask)
+{
+ struct mlx5dr_table *tbl = matcher->tbl;
+ struct mlx5dr_domain *dmn = tbl->dmn;
+ int ret;
+
+ ret = dr_matcher_copy_param(matcher, mask);
+ if (ret)
+ return ret;
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ matcher->rx.nic_tbl = &tbl->rx;
+ ret = dr_matcher_init_nic(matcher, &matcher->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ matcher->tx.nic_tbl = &tbl->tx;
+ ret = dr_matcher_init_nic(matcher, &matcher->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ matcher->rx.nic_tbl = &tbl->rx;
+ matcher->tx.nic_tbl = &tbl->tx;
+ ret = dr_matcher_init_fdb(matcher);
+ break;
+ default:
+ WARN_ON(true);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static void dr_matcher_add_to_dbg_list(struct mlx5dr_matcher *matcher)
+{
+ mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+ list_add(&matcher->list_node, &matcher->tbl->matcher_list);
+ mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+}
+
+static void dr_matcher_remove_from_dbg_list(struct mlx5dr_matcher *matcher)
+{
+ mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+ list_del(&matcher->list_node);
+ mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex);
+}
+
+struct mlx5dr_matcher *
+mlx5dr_matcher_create(struct mlx5dr_table *tbl,
+ u32 priority,
+ u8 match_criteria_enable,
+ struct mlx5dr_match_parameters *mask)
+{
+ struct mlx5dr_matcher *matcher;
+ int ret;
+
+ refcount_inc(&tbl->refcount);
+
+ matcher = kzalloc(sizeof(*matcher), GFP_KERNEL);
+ if (!matcher)
+ goto dec_ref;
+
+ matcher->tbl = tbl;
+ matcher->prio = priority;
+ matcher->match_criteria = match_criteria_enable;
+ refcount_set(&matcher->refcount, 1);
+ INIT_LIST_HEAD(&matcher->list_node);
+ INIT_LIST_HEAD(&matcher->dbg_rule_list);
+
+ mlx5dr_domain_lock(tbl->dmn);
+
+ ret = dr_matcher_init(matcher, mask);
+ if (ret)
+ goto free_matcher;
+
+ dr_matcher_add_to_dbg_list(matcher);
+
+ mlx5dr_domain_unlock(tbl->dmn);
+
+ return matcher;
+
+free_matcher:
+ mlx5dr_domain_unlock(tbl->dmn);
+ kfree(matcher);
+dec_ref:
+ refcount_dec(&tbl->refcount);
+ return NULL;
+}
+
+static int dr_matcher_disconnect_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_table_rx_tx *nic_tbl,
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_ste_htbl *prev_anchor;
+
+ if (prev_nic_matcher)
+ prev_anchor = prev_nic_matcher->e_anchor;
+ else
+ prev_anchor = nic_tbl->s_anchor;
+
+ /* Connect previous anchor hash table to next matcher or to the default address */
+ if (next_nic_matcher) {
+ info.type = CONNECT_HIT;
+ info.hit_next_htbl = next_nic_matcher->s_htbl;
+ next_nic_matcher->s_htbl->pointing_ste = prev_anchor->chunk->ste_arr;
+ prev_anchor->chunk->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
+ } else {
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_tbl->default_icm_addr;
+ prev_anchor->chunk->ste_arr[0].next_htbl = NULL;
+ }
+
+ return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor,
+ &info, true);
+}
+
+int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher, *next_nic_matcher;
+ struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl;
+ int ret;
+
+ /* If the nic matcher is not on its parent nic table list,
+ * then it is detached - no need to disconnect it.
+ */
+ if (list_empty(&nic_matcher->list_node))
+ return 0;
+
+ if (list_is_last(&nic_matcher->list_node, &nic_tbl->nic_matcher_list))
+ next_nic_matcher = NULL;
+ else
+ next_nic_matcher = list_next_entry(nic_matcher, list_node);
+
+ if (nic_matcher->list_node.prev == &nic_tbl->nic_matcher_list)
+ prev_nic_matcher = NULL;
+ else
+ prev_nic_matcher = list_prev_entry(nic_matcher, list_node);
+
+ ret = dr_matcher_disconnect_nic(dmn, nic_tbl, next_nic_matcher, prev_nic_matcher);
+ if (ret)
+ return ret;
+
+ list_del_init(&nic_matcher->list_node);
+ return 0;
+}
+
+int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_table *tbl = matcher->tbl;
+
+ if (WARN_ON_ONCE(refcount_read(&matcher->refcount) > 1))
+ return -EBUSY;
+
+ mlx5dr_domain_lock(tbl->dmn);
+
+ dr_matcher_remove_from_dbg_list(matcher);
+ dr_matcher_uninit(matcher);
+ refcount_dec(&matcher->tbl->refcount);
+
+ mlx5dr_domain_unlock(tbl->dmn);
+ kfree(matcher);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
new file mode 100644
index 000000000..91ff19f67
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -0,0 +1,1334 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+#define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES)
+
+static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste *new_last_ste,
+ struct list_head *miss_list,
+ struct list_head *send_list)
+{
+ struct mlx5dr_ste_send_info *ste_info_last;
+ struct mlx5dr_ste *last_ste;
+
+ /* The new entry will be inserted after the last */
+ last_ste = list_last_entry(miss_list, struct mlx5dr_ste, miss_list_node);
+ WARN_ON(!last_ste);
+
+ ste_info_last = kzalloc(sizeof(*ste_info_last), GFP_KERNEL);
+ if (!ste_info_last)
+ return -ENOMEM;
+
+ mlx5dr_ste_set_miss_addr(ste_ctx, mlx5dr_ste_get_hw_ste(last_ste),
+ mlx5dr_ste_get_icm_addr(new_last_ste));
+ list_add_tail(&new_last_ste->miss_list_node, miss_list);
+
+ mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_CTRL,
+ 0, mlx5dr_ste_get_hw_ste(last_ste),
+ ste_info_last, send_list, true);
+
+ return 0;
+}
+
+static struct mlx5dr_ste *
+dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ u8 *hw_ste)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
+ struct mlx5dr_ste_htbl *new_htbl;
+ struct mlx5dr_ste *ste;
+ u64 icm_addr;
+
+ /* Create new table for miss entry */
+ new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ DR_CHUNK_SIZE_1,
+ MLX5DR_STE_LU_TYPE_DONT_CARE,
+ 0);
+ if (!new_htbl) {
+ mlx5dr_dbg(dmn, "Failed allocating collision table\n");
+ return NULL;
+ }
+
+ /* One and only entry, never grows */
+ ste = new_htbl->chunk->ste_arr;
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr);
+ mlx5dr_htbl_get(new_htbl);
+
+ return ste;
+}
+
+static struct mlx5dr_ste *
+dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ u8 *hw_ste,
+ struct mlx5dr_ste *orig_ste)
+{
+ struct mlx5dr_ste *ste;
+
+ ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste);
+ if (!ste) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Failed creating collision entry\n");
+ return NULL;
+ }
+
+ ste->ste_chain_location = orig_ste->ste_chain_location;
+ ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste;
+
+ /* In collision entry, all members share the same miss_list_head */
+ ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(orig_ste);
+
+ /* Next table */
+ if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste,
+ DR_CHUNK_SIZE_1)) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n");
+ goto free_tbl;
+ }
+
+ return ste;
+
+free_tbl:
+ mlx5dr_ste_free(ste, matcher, nic_matcher);
+ return NULL;
+}
+
+static int
+dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info,
+ struct mlx5dr_domain *dmn)
+{
+ int ret;
+
+ list_del(&ste_info->send_list);
+
+ /* Copy data to ste, only reduced size or control, the last 16B (mask)
+ * is already written to the hw.
+ */
+ if (ste_info->size == DR_STE_SIZE_CTRL)
+ memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste),
+ ste_info->data, DR_STE_SIZE_CTRL);
+ else
+ memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste),
+ ste_info->data, DR_STE_SIZE_REDUCED);
+
+ ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data,
+ ste_info->size, ste_info->offset);
+ if (ret)
+ goto out;
+
+out:
+ kfree(ste_info);
+ return ret;
+}
+
+static int dr_rule_send_update_list(struct list_head *send_ste_list,
+ struct mlx5dr_domain *dmn,
+ bool is_reverse)
+{
+ struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info;
+ int ret;
+
+ if (is_reverse) {
+ list_for_each_entry_safe_reverse(ste_info, tmp_ste_info,
+ send_ste_list, send_list) {
+ ret = dr_rule_handle_one_ste_in_update_list(ste_info,
+ dmn);
+ if (ret)
+ return ret;
+ }
+ } else {
+ list_for_each_entry_safe(ste_info, tmp_ste_info,
+ send_ste_list, send_list) {
+ ret = dr_rule_handle_one_ste_in_update_list(ste_info,
+ dmn);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static struct mlx5dr_ste *
+dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste)
+{
+ struct mlx5dr_ste *ste;
+
+ if (list_empty(miss_list))
+ return NULL;
+
+ /* Check if hw_ste is present in the list */
+ list_for_each_entry(ste, miss_list, miss_list_node) {
+ if (mlx5dr_ste_equal_tag(mlx5dr_ste_get_hw_ste(ste), hw_ste))
+ return ste;
+ }
+
+ return NULL;
+}
+
+static struct mlx5dr_ste *
+dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct list_head *update_list,
+ struct mlx5dr_ste *col_ste,
+ u8 *hw_ste)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste *new_ste;
+ int ret;
+
+ new_ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste);
+ if (!new_ste)
+ return NULL;
+
+ /* Update collision pointing STE */
+ new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste;
+
+ /* In collision entry, all members share the same miss_list_head */
+ new_ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(col_ste);
+
+ /* Update the previous from the list */
+ ret = dr_rule_append_to_miss_list(dmn->ste_ctx, new_ste,
+ mlx5dr_ste_get_miss_list(col_ste),
+ update_list);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed update dup entry\n");
+ goto err_exit;
+ }
+
+ return new_ste;
+
+err_exit:
+ mlx5dr_ste_free(new_ste, matcher, nic_matcher);
+ return NULL;
+}
+
+static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *cur_ste,
+ struct mlx5dr_ste *new_ste)
+{
+ new_ste->next_htbl = cur_ste->next_htbl;
+ new_ste->ste_chain_location = cur_ste->ste_chain_location;
+
+ if (new_ste->next_htbl)
+ new_ste->next_htbl->pointing_ste = new_ste;
+
+ /* We need to copy the refcount since this ste
+ * may have been traversed several times
+ */
+ new_ste->refcount = cur_ste->refcount;
+
+ /* Link old STEs rule to the new ste */
+ mlx5dr_rule_set_last_member(cur_ste->rule_rx_tx, new_ste, false);
+}
+
+static struct mlx5dr_ste *
+dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *cur_ste,
+ struct mlx5dr_ste_htbl *new_htbl,
+ struct list_head *update_list)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_send_info *ste_info;
+ bool use_update_list = false;
+ u8 hw_ste[DR_STE_SIZE] = {};
+ struct mlx5dr_ste *new_ste;
+ u64 icm_addr;
+ int new_idx;
+ u8 sb_idx;
+
+ /* Copy STE mask from the matcher */
+ sb_idx = cur_ste->ste_chain_location - 1;
+ mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask);
+
+ /* Copy STE control and tag */
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ memcpy(hw_ste, mlx5dr_ste_get_hw_ste(cur_ste), DR_STE_SIZE_REDUCED);
+ mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr);
+
+ new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl);
+ new_ste = &new_htbl->chunk->ste_arr[new_idx];
+
+ if (mlx5dr_ste_is_not_used(new_ste)) {
+ mlx5dr_htbl_get(new_htbl);
+ list_add_tail(&new_ste->miss_list_node,
+ mlx5dr_ste_get_miss_list(new_ste));
+ } else {
+ new_ste = dr_rule_rehash_handle_collision(matcher,
+ nic_matcher,
+ update_list,
+ new_ste,
+ hw_ste);
+ if (!new_ste) {
+ mlx5dr_dbg(dmn, "Failed adding collision entry, index: %d\n",
+ new_idx);
+ return NULL;
+ }
+ new_htbl->ctrl.num_of_collisions++;
+ use_update_list = true;
+ }
+
+ memcpy(mlx5dr_ste_get_hw_ste(new_ste), hw_ste, DR_STE_SIZE_REDUCED);
+
+ new_htbl->ctrl.num_of_valid_entries++;
+
+ if (use_update_list) {
+ ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+ if (!ste_info)
+ goto err_exit;
+
+ mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0,
+ hw_ste, ste_info,
+ update_list, true);
+ }
+
+ dr_rule_rehash_copy_ste_ctrl(matcher, nic_matcher, cur_ste, new_ste);
+
+ return new_ste;
+
+err_exit:
+ mlx5dr_ste_free(new_ste, matcher, nic_matcher);
+ return NULL;
+}
+
+static int dr_rule_rehash_copy_miss_list(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct list_head *cur_miss_list,
+ struct mlx5dr_ste_htbl *new_htbl,
+ struct list_head *update_list)
+{
+ struct mlx5dr_ste *tmp_ste, *cur_ste, *new_ste;
+
+ if (list_empty(cur_miss_list))
+ return 0;
+
+ list_for_each_entry_safe(cur_ste, tmp_ste, cur_miss_list, miss_list_node) {
+ new_ste = dr_rule_rehash_copy_ste(matcher,
+ nic_matcher,
+ cur_ste,
+ new_htbl,
+ update_list);
+ if (!new_ste)
+ goto err_insert;
+
+ list_del(&cur_ste->miss_list_node);
+ mlx5dr_htbl_put(cur_ste->htbl);
+ }
+ return 0;
+
+err_insert:
+ mlx5dr_err(matcher->tbl->dmn, "Fatal error during resize\n");
+ WARN_ON(true);
+ return -EINVAL;
+}
+
+static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ struct mlx5dr_ste_htbl *new_htbl,
+ struct list_head *update_list)
+{
+ struct mlx5dr_ste *cur_ste;
+ int cur_entries;
+ int err = 0;
+ int i;
+
+ cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk->size);
+
+ if (cur_entries < 1) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < cur_entries; i++) {
+ cur_ste = &cur_htbl->chunk->ste_arr[i];
+ if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */
+ continue;
+
+ err = dr_rule_rehash_copy_miss_list(matcher,
+ nic_matcher,
+ mlx5dr_ste_get_miss_list(cur_ste),
+ new_htbl,
+ update_list);
+ if (err)
+ goto clean_copy;
+ }
+
+clean_copy:
+ return err;
+}
+
+static struct mlx5dr_ste_htbl *
+dr_rule_rehash_htbl(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ u8 ste_location,
+ struct list_head *update_list,
+ enum mlx5dr_icm_chunk_size new_size)
+{
+ struct mlx5dr_ste_send_info *del_ste_info, *tmp_ste_info;
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_matcher_rx_tx *nic_matcher;
+ struct mlx5dr_ste_send_info *ste_info;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_domain_rx_tx *nic_dmn;
+ u8 formatted_ste[DR_STE_SIZE] = {};
+ LIST_HEAD(rehash_table_send_list);
+ struct mlx5dr_ste *ste_to_update;
+ struct mlx5dr_ste_htbl *new_htbl;
+ int err;
+
+ nic_matcher = nic_rule->nic_matcher;
+ nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+
+ ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+ if (!ste_info)
+ return NULL;
+
+ new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ new_size,
+ cur_htbl->lu_type,
+ cur_htbl->byte_mask);
+ if (!new_htbl) {
+ mlx5dr_err(dmn, "Failed to allocate new hash table\n");
+ goto free_ste_info;
+ }
+
+ /* Write new table to HW */
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ mlx5dr_ste_set_formatted_ste(dmn->ste_ctx,
+ dmn->info.caps.gvmi,
+ nic_dmn->type,
+ new_htbl,
+ formatted_ste,
+ &info);
+
+ new_htbl->pointing_ste = cur_htbl->pointing_ste;
+ new_htbl->pointing_ste->next_htbl = new_htbl;
+ err = dr_rule_rehash_copy_htbl(matcher,
+ nic_matcher,
+ cur_htbl,
+ new_htbl,
+ &rehash_table_send_list);
+ if (err)
+ goto free_new_htbl;
+
+ if (mlx5dr_send_postsend_htbl(dmn, new_htbl, formatted_ste,
+ nic_matcher->ste_builder[ste_location - 1].bit_mask)) {
+ mlx5dr_err(dmn, "Failed writing table to HW\n");
+ goto free_new_htbl;
+ }
+
+ /* Writing to the hw is done in regular order of rehash_table_send_list,
+ * in order to have the origin data written before the miss address of
+ * collision entries, if exists.
+ */
+ if (dr_rule_send_update_list(&rehash_table_send_list, dmn, false)) {
+ mlx5dr_err(dmn, "Failed updating table to HW\n");
+ goto free_ste_list;
+ }
+
+ /* Connect previous hash table to current */
+ if (ste_location == 1) {
+ /* The previous table is an anchor, anchors size is always one STE */
+ struct mlx5dr_ste_htbl *prev_htbl = cur_htbl->pointing_ste->htbl;
+
+ /* On matcher s_anchor we keep an extra refcount */
+ mlx5dr_htbl_get(new_htbl);
+ mlx5dr_htbl_put(cur_htbl);
+
+ nic_matcher->s_htbl = new_htbl;
+
+ /* It is safe to operate dr_ste_set_hit_addr on the hw_ste here
+ * (48B len) which works only on first 32B
+ */
+ mlx5dr_ste_set_hit_addr(dmn->ste_ctx,
+ prev_htbl->chunk->hw_ste_arr,
+ mlx5dr_icm_pool_get_chunk_icm_addr(new_htbl->chunk),
+ mlx5dr_icm_pool_get_chunk_num_of_entries(new_htbl->chunk));
+
+ ste_to_update = &prev_htbl->chunk->ste_arr[0];
+ } else {
+ mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx,
+ mlx5dr_ste_get_hw_ste(cur_htbl->pointing_ste),
+ new_htbl);
+ ste_to_update = cur_htbl->pointing_ste;
+ }
+
+ mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_CTRL,
+ 0, mlx5dr_ste_get_hw_ste(ste_to_update),
+ ste_info, update_list, false);
+
+ return new_htbl;
+
+free_ste_list:
+ /* Clean all ste_info's from the new table */
+ list_for_each_entry_safe(del_ste_info, tmp_ste_info,
+ &rehash_table_send_list, send_list) {
+ list_del(&del_ste_info->send_list);
+ kfree(del_ste_info);
+ }
+
+free_new_htbl:
+ mlx5dr_ste_htbl_free(new_htbl);
+free_ste_info:
+ kfree(ste_info);
+ mlx5dr_info(dmn, "Failed creating rehash table\n");
+ return NULL;
+}
+
+static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ u8 ste_location,
+ struct list_head *update_list)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+ enum mlx5dr_icm_chunk_size new_size;
+
+ new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk->size);
+ new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz);
+
+ if (new_size == cur_htbl->chunk->size)
+ return NULL; /* Skip rehash, we already at the max size */
+
+ return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location,
+ update_list, new_size);
+}
+
+static struct mlx5dr_ste *
+dr_rule_handle_collision(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *ste,
+ u8 *hw_ste,
+ struct list_head *miss_list,
+ struct list_head *send_list)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
+ struct mlx5dr_ste_send_info *ste_info;
+ struct mlx5dr_ste *new_ste;
+
+ ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+ if (!ste_info)
+ return NULL;
+
+ new_ste = dr_rule_create_collision_entry(matcher, nic_matcher, hw_ste, ste);
+ if (!new_ste)
+ goto free_send_info;
+
+ if (dr_rule_append_to_miss_list(ste_ctx, new_ste,
+ miss_list, send_list)) {
+ mlx5dr_dbg(dmn, "Failed to update prev miss_list\n");
+ goto err_exit;
+ }
+
+ mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, hw_ste,
+ ste_info, send_list, false);
+
+ ste->htbl->ctrl.num_of_collisions++;
+ ste->htbl->ctrl.num_of_valid_entries++;
+
+ return new_ste;
+
+err_exit:
+ mlx5dr_ste_free(new_ste, matcher, nic_matcher);
+free_send_info:
+ kfree(ste_info);
+ return NULL;
+}
+
+static void dr_rule_remove_action_members(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_rule_action_member *action_mem;
+ struct mlx5dr_rule_action_member *tmp;
+
+ list_for_each_entry_safe(action_mem, tmp, &rule->rule_actions_list, list) {
+ list_del(&action_mem->list);
+ refcount_dec(&action_mem->action->refcount);
+ kvfree(action_mem);
+ }
+}
+
+static int dr_rule_add_action_members(struct mlx5dr_rule *rule,
+ size_t num_actions,
+ struct mlx5dr_action *actions[])
+{
+ struct mlx5dr_rule_action_member *action_mem;
+ int i;
+
+ for (i = 0; i < num_actions; i++) {
+ action_mem = kvzalloc(sizeof(*action_mem), GFP_KERNEL);
+ if (!action_mem)
+ goto free_action_members;
+
+ action_mem->action = actions[i];
+ INIT_LIST_HEAD(&action_mem->list);
+ list_add_tail(&action_mem->list, &rule->rule_actions_list);
+ refcount_inc(&action_mem->action->refcount);
+ }
+
+ return 0;
+
+free_action_members:
+ dr_rule_remove_action_members(rule);
+ return -ENOMEM;
+}
+
+void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste *ste,
+ bool force)
+{
+ /* Update rule member is usually done for the last STE or during rule
+ * creation to recover from mid-creation failure (for this peruse the
+ * force flag is used)
+ */
+ if (ste->next_htbl && !force)
+ return;
+
+ /* Update is required since each rule keeps track of its last STE */
+ ste->rule_rx_tx = nic_rule;
+ nic_rule->last_rule_ste = ste;
+}
+
+static struct mlx5dr_ste *dr_rule_get_pointed_ste(struct mlx5dr_ste *curr_ste)
+{
+ struct mlx5dr_ste *first_ste;
+
+ first_ste = list_first_entry(mlx5dr_ste_get_miss_list(curr_ste),
+ struct mlx5dr_ste, miss_list_node);
+
+ return first_ste->htbl->pointing_ste;
+}
+
+int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr,
+ struct mlx5dr_ste *curr_ste,
+ int *num_of_stes)
+{
+ bool first = false;
+
+ *num_of_stes = 0;
+
+ if (!curr_ste)
+ return -ENOENT;
+
+ /* Iterate from last to first */
+ while (!first) {
+ first = curr_ste->ste_chain_location == 1;
+ ste_arr[*num_of_stes] = curr_ste;
+ *num_of_stes += 1;
+ curr_ste = dr_rule_get_pointed_ste(curr_ste);
+ }
+
+ return 0;
+}
+
+static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule)
+{
+ struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
+ struct mlx5dr_ste *curr_ste = nic_rule->last_rule_ste;
+ int i;
+
+ if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i))
+ return;
+
+ while (i--)
+ mlx5dr_ste_put(ste_arr[i], rule->matcher, nic_rule->nic_matcher);
+}
+
+static u16 dr_get_bits_per_mask(u16 byte_mask)
+{
+ u16 bits = 0;
+
+ while (byte_mask) {
+ byte_mask = byte_mask & (byte_mask - 1);
+ bits++;
+ }
+
+ return bits;
+}
+
+static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl,
+ struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+ struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
+ int threshold;
+
+ if (dmn->info.max_log_sw_icm_sz <= htbl->chunk->size)
+ return false;
+
+ if (!mlx5dr_ste_htbl_may_grow(htbl))
+ return false;
+
+ if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk->size)
+ return false;
+
+ threshold = mlx5dr_ste_htbl_increase_threshold(htbl);
+ if (ctrl->num_of_collisions >= threshold &&
+ (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= threshold)
+ return true;
+
+ return false;
+}
+
+static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste *last_ste,
+ u8 *hw_ste_arr,
+ u32 new_hw_ste_arr_sz)
+{
+ struct mlx5dr_matcher_rx_tx *nic_matcher = nic_rule->nic_matcher;
+ struct mlx5dr_ste_send_info *ste_info_arr[DR_ACTION_MAX_STES];
+ u8 num_of_builders = nic_matcher->num_of_builders;
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ u8 *curr_hw_ste, *prev_hw_ste;
+ struct mlx5dr_ste *action_ste;
+ int i, k;
+
+ /* Two cases:
+ * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste
+ * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added
+ * to support the action.
+ */
+
+ for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) {
+ curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE;
+ prev_hw_ste = (i == 0) ? curr_hw_ste : hw_ste_arr + ((i - 1) * DR_STE_SIZE);
+ action_ste = dr_rule_create_collision_htbl(matcher,
+ nic_matcher,
+ curr_hw_ste);
+ if (!action_ste)
+ return -ENOMEM;
+
+ mlx5dr_ste_get(action_ste);
+
+ action_ste->htbl->pointing_ste = last_ste;
+ last_ste->next_htbl = action_ste->htbl;
+ last_ste = action_ste;
+
+ /* While free ste we go over the miss list, so add this ste to the list */
+ list_add_tail(&action_ste->miss_list_node,
+ mlx5dr_ste_get_miss_list(action_ste));
+
+ ste_info_arr[k] = kzalloc(sizeof(*ste_info_arr[k]),
+ GFP_KERNEL);
+ if (!ste_info_arr[k])
+ goto err_exit;
+
+ /* Point current ste to the new action */
+ mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx,
+ prev_hw_ste,
+ action_ste->htbl);
+
+ mlx5dr_rule_set_last_member(nic_rule, action_ste, true);
+
+ mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0,
+ curr_hw_ste,
+ ste_info_arr[k],
+ send_ste_list, false);
+ }
+
+ last_ste->next_htbl = NULL;
+
+ return 0;
+
+err_exit:
+ mlx5dr_ste_put(action_ste, matcher, nic_matcher);
+ return -ENOMEM;
+}
+
+static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ struct mlx5dr_ste *ste,
+ u8 ste_location,
+ u8 *hw_ste,
+ struct list_head *miss_list,
+ struct list_head *send_list)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_send_info *ste_info;
+ u64 icm_addr;
+
+ /* Take ref on table, only on first time this ste is used */
+ mlx5dr_htbl_get(cur_htbl);
+
+ /* new entry -> new branch */
+ list_add_tail(&ste->miss_list_node, miss_list);
+
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr);
+
+ ste->ste_chain_location = ste_location;
+
+ ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+ if (!ste_info)
+ goto clean_ste_setting;
+
+ if (mlx5dr_ste_create_next_htbl(matcher,
+ nic_matcher,
+ ste,
+ hw_ste,
+ DR_CHUNK_SIZE_1)) {
+ mlx5dr_dbg(dmn, "Failed allocating table\n");
+ goto clean_ste_info;
+ }
+
+ cur_htbl->ctrl.num_of_valid_entries++;
+
+ mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, 0, hw_ste,
+ ste_info, send_list, false);
+
+ return 0;
+
+clean_ste_info:
+ kfree(ste_info);
+clean_ste_setting:
+ list_del_init(&ste->miss_list_node);
+ mlx5dr_htbl_put(cur_htbl);
+
+ return -ENOMEM;
+}
+
+static struct mlx5dr_ste *
+dr_rule_handle_ste_branch(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ u8 *hw_ste,
+ u8 ste_location,
+ struct mlx5dr_ste_htbl **put_htbl)
+{
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_matcher_rx_tx *nic_matcher;
+ struct mlx5dr_domain_rx_tx *nic_dmn;
+ struct mlx5dr_ste_htbl *new_htbl;
+ struct mlx5dr_ste *matched_ste;
+ struct list_head *miss_list;
+ bool skip_rehash = false;
+ struct mlx5dr_ste *ste;
+ int index;
+
+ nic_matcher = nic_rule->nic_matcher;
+ nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+
+again:
+ index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl);
+ miss_list = &cur_htbl->chunk->miss_list[index];
+ ste = &cur_htbl->chunk->ste_arr[index];
+
+ if (mlx5dr_ste_is_not_used(ste)) {
+ if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl,
+ ste, ste_location,
+ hw_ste, miss_list,
+ send_ste_list))
+ return NULL;
+ } else {
+ /* Hash table index in use, check if this ste is in the miss list */
+ matched_ste = dr_rule_find_ste_in_miss_list(miss_list, hw_ste);
+ if (matched_ste) {
+ /* If it is last STE in the chain, and has the same tag
+ * it means that all the previous stes are the same,
+ * if so, this rule is duplicated.
+ */
+ if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location))
+ return matched_ste;
+
+ mlx5dr_dbg(dmn, "Duplicate rule inserted\n");
+ }
+
+ if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) {
+ /* Hash table index in use, try to resize of the hash */
+ skip_rehash = true;
+
+ /* Hold the table till we update.
+ * Release in dr_rule_create_rule()
+ */
+ *put_htbl = cur_htbl;
+ mlx5dr_htbl_get(cur_htbl);
+
+ new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl,
+ ste_location, send_ste_list);
+ if (!new_htbl) {
+ mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n",
+ cur_htbl->chunk->size);
+ mlx5dr_htbl_put(cur_htbl);
+ } else {
+ cur_htbl = new_htbl;
+ }
+ goto again;
+ } else {
+ /* Hash table index in use, add another collision (miss) */
+ ste = dr_rule_handle_collision(matcher,
+ nic_matcher,
+ ste,
+ hw_ste,
+ miss_list,
+ send_ste_list);
+ if (!ste) {
+ mlx5dr_dbg(dmn, "failed adding collision entry, index: %d\n",
+ index);
+ return NULL;
+ }
+ }
+ }
+ return ste;
+}
+
+static bool dr_rule_cmp_value_to_mask(u8 *mask, u8 *value,
+ u32 s_idx, u32 e_idx)
+{
+ u32 i;
+
+ for (i = s_idx; i < e_idx; i++) {
+ if (value[i] & ~mask[i]) {
+ pr_info("Rule parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *value,
+ struct mlx5dr_match_param *param)
+{
+ u8 match_criteria = matcher->match_criteria;
+ size_t value_size = value->match_sz;
+ u8 *mask_p = (u8 *)&matcher->mask;
+ u8 *param_p = (u8 *)param;
+ u32 s_idx, e_idx;
+
+ if (!value_size ||
+ (value_size > DR_SZ_MATCH_PARAM || (value_size % sizeof(u32)))) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule parameters length is incorrect\n");
+ return false;
+ }
+
+ mlx5dr_ste_copy_param(matcher->match_criteria, param, value, false);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_OUTER) {
+ s_idx = offsetof(struct mlx5dr_match_param, outer);
+ e_idx = min(s_idx + sizeof(param->outer), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc);
+ e_idx = min(s_idx + sizeof(param->misc), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_INNER) {
+ s_idx = offsetof(struct mlx5dr_match_param, inner);
+ e_idx = min(s_idx + sizeof(param->inner), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC2) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc2);
+ e_idx = min(s_idx + sizeof(param->misc2), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC3) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc3);
+ e_idx = min(s_idx + sizeof(param->misc3), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC4) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc4);
+ e_idx = min(s_idx + sizeof(param->misc4), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn,
+ "Rule misc4 parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC5) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc5);
+ e_idx = min(s_idx + sizeof(param->misc5), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_err(matcher->tbl->dmn, "Rule misc5 parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule)
+{
+ /* Check if this nic rule was actually created, or was it skipped
+ * and only the other type of the RX/TX nic rule was created.
+ */
+ if (!nic_rule->last_rule_ste)
+ return 0;
+
+ mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
+ dr_rule_clean_rule_members(rule, nic_rule);
+
+ nic_rule->nic_matcher->rules--;
+ if (!nic_rule->nic_matcher->rules)
+ mlx5dr_matcher_remove_from_tbl_nic(rule->matcher->tbl->dmn,
+ nic_rule->nic_matcher);
+
+ mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
+
+ return 0;
+}
+
+static int dr_rule_destroy_rule_fdb(struct mlx5dr_rule *rule)
+{
+ dr_rule_destroy_rule_nic(rule, &rule->rx);
+ dr_rule_destroy_rule_nic(rule, &rule->tx);
+ return 0;
+}
+
+static int dr_rule_destroy_rule(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+
+ mlx5dr_dbg_rule_del(rule);
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ dr_rule_destroy_rule_nic(rule, &rule->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ dr_rule_destroy_rule_nic(rule, &rule->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ dr_rule_destroy_rule_fdb(rule);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dr_rule_remove_action_members(rule);
+ kfree(rule);
+ return 0;
+}
+
+static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec)
+{
+ if (spec->ip_version == 6 || spec->ethertype == ETH_P_IPV6)
+ return DR_RULE_IPV6;
+
+ return DR_RULE_IPV4;
+}
+
+static bool dr_rule_skip(enum mlx5dr_domain_type domain,
+ enum mlx5dr_domain_nic_type nic_type,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_match_param *value,
+ u32 flow_source)
+{
+ bool rx = nic_type == DR_DOMAIN_NIC_TYPE_RX;
+
+ if (domain != MLX5DR_DOMAIN_TYPE_FDB)
+ return false;
+
+ if (mask->misc.source_port) {
+ if (rx && value->misc.source_port != MLX5_VPORT_UPLINK)
+ return true;
+
+ if (!rx && value->misc.source_port == MLX5_VPORT_UPLINK)
+ return true;
+ }
+
+ if (rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT)
+ return true;
+
+ if (!rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK)
+ return true;
+
+ return false;
+}
+
+static int
+dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_match_param *param,
+ size_t num_actions,
+ struct mlx5dr_action *actions[])
+{
+ struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info;
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_matcher_rx_tx *nic_matcher;
+ struct mlx5dr_domain_rx_tx *nic_dmn;
+ struct mlx5dr_ste_htbl *htbl = NULL;
+ struct mlx5dr_ste_htbl *cur_htbl;
+ struct mlx5dr_ste *ste = NULL;
+ LIST_HEAD(send_ste_list);
+ u8 *hw_ste_arr = NULL;
+ u32 new_hw_ste_arr_sz;
+ int ret, i;
+
+ nic_matcher = nic_rule->nic_matcher;
+ nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+
+ if (dr_rule_skip(dmn->type, nic_dmn->type, &matcher->mask, param,
+ rule->flow_source))
+ return 0;
+
+ hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
+ if (!hw_ste_arr)
+ return -ENOMEM;
+
+ mlx5dr_domain_nic_lock(nic_dmn);
+
+ ret = mlx5dr_matcher_add_to_tbl_nic(dmn, nic_matcher);
+ if (ret)
+ goto free_hw_ste;
+
+ ret = mlx5dr_matcher_select_builders(matcher,
+ nic_matcher,
+ dr_rule_get_ipv(&param->outer),
+ dr_rule_get_ipv(&param->inner));
+ if (ret)
+ goto remove_from_nic_tbl;
+
+ /* Set the tag values inside the ste array */
+ ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr);
+ if (ret)
+ goto remove_from_nic_tbl;
+
+ /* Set the actions values/addresses inside the ste array */
+ ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions,
+ num_actions, hw_ste_arr,
+ &new_hw_ste_arr_sz);
+ if (ret)
+ goto remove_from_nic_tbl;
+
+ cur_htbl = nic_matcher->s_htbl;
+
+ /* Go over the array of STEs, and build dr_ste accordingly.
+ * The loop is over only the builders which are equal or less to the
+ * number of stes, in case we have actions that lives in other stes.
+ */
+ for (i = 0; i < nic_matcher->num_of_builders; i++) {
+ /* Calculate CRC and keep new ste entry */
+ u8 *cur_hw_ste_ent = hw_ste_arr + (i * DR_STE_SIZE);
+
+ ste = dr_rule_handle_ste_branch(rule,
+ nic_rule,
+ &send_ste_list,
+ cur_htbl,
+ cur_hw_ste_ent,
+ i + 1,
+ &htbl);
+ if (!ste) {
+ mlx5dr_err(dmn, "Failed creating next branch\n");
+ ret = -ENOENT;
+ goto free_rule;
+ }
+
+ cur_htbl = ste->next_htbl;
+
+ mlx5dr_ste_get(ste);
+ mlx5dr_rule_set_last_member(nic_rule, ste, true);
+ }
+
+ /* Connect actions */
+ ret = dr_rule_handle_action_stes(rule, nic_rule, &send_ste_list,
+ ste, hw_ste_arr, new_hw_ste_arr_sz);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed apply actions\n");
+ goto free_rule;
+ }
+ ret = dr_rule_send_update_list(&send_ste_list, dmn, true);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed sending ste!\n");
+ goto free_rule;
+ }
+
+ if (htbl)
+ mlx5dr_htbl_put(htbl);
+
+ nic_matcher->rules++;
+
+ mlx5dr_domain_nic_unlock(nic_dmn);
+
+ kfree(hw_ste_arr);
+
+ return 0;
+
+free_rule:
+ dr_rule_clean_rule_members(rule, nic_rule);
+ /* Clean all ste_info's */
+ list_for_each_entry_safe(ste_info, tmp_ste_info, &send_ste_list, send_list) {
+ list_del(&ste_info->send_list);
+ kfree(ste_info);
+ }
+
+remove_from_nic_tbl:
+ if (!nic_matcher->rules)
+ mlx5dr_matcher_remove_from_tbl_nic(dmn, nic_matcher);
+
+free_hw_ste:
+ mlx5dr_domain_nic_unlock(nic_dmn);
+ kfree(hw_ste_arr);
+ return ret;
+}
+
+static int
+dr_rule_create_rule_fdb(struct mlx5dr_rule *rule,
+ struct mlx5dr_match_param *param,
+ size_t num_actions,
+ struct mlx5dr_action *actions[])
+{
+ struct mlx5dr_match_param copy_param = {};
+ int ret;
+
+ /* Copy match_param since they will be consumed during the first
+ * nic_rule insertion.
+ */
+ memcpy(&copy_param, param, sizeof(struct mlx5dr_match_param));
+
+ ret = dr_rule_create_rule_nic(rule, &rule->rx, param,
+ num_actions, actions);
+ if (ret)
+ return ret;
+
+ ret = dr_rule_create_rule_nic(rule, &rule->tx, &copy_param,
+ num_actions, actions);
+ if (ret)
+ goto destroy_rule_nic_rx;
+
+ return 0;
+
+destroy_rule_nic_rx:
+ dr_rule_destroy_rule_nic(rule, &rule->rx);
+ return ret;
+}
+
+static struct mlx5dr_rule *
+dr_rule_create_rule(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *value,
+ size_t num_actions,
+ struct mlx5dr_action *actions[],
+ u32 flow_source)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_match_param param = {};
+ struct mlx5dr_rule *rule;
+ int ret;
+
+ if (!dr_rule_verify(matcher, value, &param))
+ return NULL;
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return NULL;
+
+ rule->matcher = matcher;
+ rule->flow_source = flow_source;
+ INIT_LIST_HEAD(&rule->rule_actions_list);
+
+ ret = dr_rule_add_action_members(rule, num_actions, actions);
+ if (ret)
+ goto free_rule;
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ rule->rx.nic_matcher = &matcher->rx;
+ ret = dr_rule_create_rule_nic(rule, &rule->rx, &param,
+ num_actions, actions);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ rule->tx.nic_matcher = &matcher->tx;
+ ret = dr_rule_create_rule_nic(rule, &rule->tx, &param,
+ num_actions, actions);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ rule->rx.nic_matcher = &matcher->rx;
+ rule->tx.nic_matcher = &matcher->tx;
+ ret = dr_rule_create_rule_fdb(rule, &param,
+ num_actions, actions);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ goto remove_action_members;
+
+ INIT_LIST_HEAD(&rule->dbg_node);
+ mlx5dr_dbg_rule_add(rule);
+ return rule;
+
+remove_action_members:
+ dr_rule_remove_action_members(rule);
+free_rule:
+ kfree(rule);
+ mlx5dr_err(dmn, "Failed creating rule\n");
+ return NULL;
+}
+
+struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *value,
+ size_t num_actions,
+ struct mlx5dr_action *actions[],
+ u32 flow_source)
+{
+ struct mlx5dr_rule *rule;
+
+ refcount_inc(&matcher->refcount);
+
+ rule = dr_rule_create_rule(matcher, value, num_actions, actions, flow_source);
+ if (!rule)
+ refcount_dec(&matcher->refcount);
+
+ return rule;
+}
+
+int mlx5dr_rule_destroy(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ int ret;
+
+ ret = dr_rule_destroy_rule(rule);
+ if (!ret)
+ refcount_dec(&matcher->refcount);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
new file mode 100644
index 000000000..ef19a66f5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -0,0 +1,1056 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/smp.h>
+#include "dr_types.h"
+
+#define QUEUE_SIZE 128
+#define SIGNAL_PER_DIV_QUEUE 16
+#define TH_NUMS_TO_DRAIN 2
+
+enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
+
+struct dr_data_seg {
+ u64 addr;
+ u32 length;
+ u32 lkey;
+ unsigned int send_flags;
+};
+
+struct postsend_info {
+ struct dr_data_seg write;
+ struct dr_data_seg read;
+ u64 remote_addr;
+ u32 rkey;
+};
+
+struct dr_qp_rtr_attr {
+ struct mlx5dr_cmd_gid_attr dgid_attr;
+ enum ib_mtu mtu;
+ u32 qp_num;
+ u16 port_num;
+ u8 min_rnr_timer;
+ u8 sgid_index;
+ u16 udp_src_port;
+ u8 fl:1;
+};
+
+struct dr_qp_rts_attr {
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+};
+
+struct dr_qp_init_attr {
+ u32 cqn;
+ u32 pdn;
+ u32 max_send_wr;
+ struct mlx5_uars_page *uar;
+ u8 isolate_vl_tc:1;
+};
+
+static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
+{
+ unsigned int idx;
+ u8 opcode;
+
+ opcode = get_cqe_opcode(cqe64);
+ if (opcode == MLX5_CQE_REQ_ERR) {
+ idx = be16_to_cpu(cqe64->wqe_counter) &
+ (dr_cq->qp->sq.wqe_cnt - 1);
+ dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
+ } else if (opcode == MLX5_CQE_RESP_ERR) {
+ ++dr_cq->qp->sq.cc;
+ } else {
+ idx = be16_to_cpu(cqe64->wqe_counter) &
+ (dr_cq->qp->sq.wqe_cnt - 1);
+ dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
+
+ return CQ_OK;
+ }
+
+ return CQ_POLL_ERR;
+}
+
+static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
+{
+ struct mlx5_cqe64 *cqe64;
+ int err;
+
+ cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
+ if (!cqe64)
+ return CQ_EMPTY;
+
+ mlx5_cqwq_pop(&dr_cq->wq);
+ err = dr_parse_cqe(dr_cq, cqe64);
+ mlx5_cqwq_update_db_record(&dr_cq->wq);
+
+ return err;
+}
+
+static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
+{
+ int npolled;
+ int err = 0;
+
+ for (npolled = 0; npolled < ne; ++npolled) {
+ err = dr_cq_poll_one(dr_cq);
+ if (err != CQ_OK)
+ break;
+ }
+
+ return err == CQ_POLL_ERR ? err : npolled;
+}
+
+static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
+ struct dr_qp_init_attr *attr)
+{
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
+ struct mlx5_wq_param wqp;
+ struct mlx5dr_qp *dr_qp;
+ int inlen;
+ void *qpc;
+ void *in;
+ int err;
+
+ dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
+ if (!dr_qp)
+ return NULL;
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ dr_qp->rq.pc = 0;
+ dr_qp->rq.cc = 0;
+ dr_qp->rq.wqe_cnt = 4;
+ dr_qp->sq.pc = 0;
+ dr_qp->sq.cc = 0;
+ dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
+
+ MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
+ MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
+ err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
+ &dr_qp->wq_ctrl);
+ if (err) {
+ mlx5_core_warn(mdev, "Can't create QP WQ\n");
+ goto err_wq;
+ }
+
+ dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
+ sizeof(dr_qp->sq.wqe_head[0]),
+ GFP_KERNEL);
+
+ if (!dr_qp->sq.wqe_head) {
+ mlx5_core_warn(mdev, "Can't allocate wqe head\n");
+ goto err_wqe_head;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+ MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+ dr_qp->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_in;
+ }
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc);
+ MLX5_SET(qpc, qpc, pd, attr->pdn);
+ MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
+ MLX5_SET(qpc, qpc, log_page_size,
+ dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, fre, 1);
+ MLX5_SET(qpc, qpc, rlky, 1);
+ MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
+ MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
+ MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
+ MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+ MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+ mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in,
+ in, pas));
+
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+ kvfree(in);
+ if (err)
+ goto err_in;
+ dr_qp->uar = attr->uar;
+
+ return dr_qp;
+
+err_in:
+ kfree(dr_qp->sq.wqe_head);
+err_wqe_head:
+ mlx5_wq_destroy(&dr_qp->wq_ctrl);
+err_wq:
+ kfree(dr_qp);
+ return NULL;
+}
+
+static void dr_destroy_qp(struct mlx5_core_dev *mdev,
+ struct mlx5dr_qp *dr_qp)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
+ mlx5_cmd_exec_in(mdev, destroy_qp, in);
+
+ kfree(dr_qp->sq.wqe_head);
+ mlx5_wq_destroy(&dr_qp->wq_ctrl);
+ kfree(dr_qp);
+}
+
+static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
+{
+ dma_wmb();
+ *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff);
+
+ /* After wmb() the hw aware of new work */
+ wmb();
+
+ mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
+}
+
+static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
+ u32 rkey, struct dr_data_seg *data_seg,
+ u32 opcode, bool notify_hw)
+{
+ struct mlx5_wqe_raddr_seg *wq_raddr;
+ struct mlx5_wqe_ctrl_seg *wq_ctrl;
+ struct mlx5_wqe_data_seg *wq_dseg;
+ unsigned int size;
+ unsigned int idx;
+
+ size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
+ sizeof(*wq_raddr) / 16;
+
+ idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
+
+ wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
+ wq_ctrl->imm = 0;
+ wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
+ MLX5_WQE_CTRL_CQ_UPDATE : 0;
+ wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
+ opcode);
+ wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
+ wq_raddr = (void *)(wq_ctrl + 1);
+ wq_raddr->raddr = cpu_to_be64(remote_addr);
+ wq_raddr->rkey = cpu_to_be32(rkey);
+ wq_raddr->reserved = 0;
+
+ wq_dseg = (void *)(wq_raddr + 1);
+ wq_dseg->byte_count = cpu_to_be32(data_seg->length);
+ wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
+ wq_dseg->addr = cpu_to_be64(data_seg->addr);
+
+ dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
+
+ if (notify_hw)
+ dr_cmd_notify_hw(dr_qp, wq_ctrl);
+}
+
+static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
+{
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->read, MLX5_OPCODE_RDMA_READ, true);
+}
+
+/**
+ * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
+ * with send_list parameters:
+ *
+ * @ste: The data that attached to this specific ste
+ * @size: of data to write
+ * @offset: of the data from start of the hw_ste entry
+ * @data: data
+ * @ste_info: ste to be sent with send_list
+ * @send_list: to append into it
+ * @copy_data: if true indicates that the data should be kept because
+ * it's not backuped any where (like in re-hash).
+ * if false, it lets the data to be updated after
+ * it was added to the list.
+ */
+void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
+ u16 offset, u8 *data,
+ struct mlx5dr_ste_send_info *ste_info,
+ struct list_head *send_list,
+ bool copy_data)
+{
+ ste_info->size = size;
+ ste_info->ste = ste;
+ ste_info->offset = offset;
+
+ if (copy_data) {
+ memcpy(ste_info->data_cont, data, size);
+ ste_info->data = ste_info->data_cont;
+ } else {
+ ste_info->data = data;
+ }
+
+ list_add_tail(&ste_info->send_list, send_list);
+}
+
+/* The function tries to consume one wc each time, unless the queue is full, in
+ * that case, which means that the hw is behind the sw in a full queue len
+ * the function will drain the cq till it empty.
+ */
+static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring)
+{
+ bool is_drain = false;
+ int ne;
+
+ if (send_ring->pending_wqe < send_ring->signal_th)
+ return 0;
+
+ /* Queue is full start drain it */
+ if (send_ring->pending_wqe >=
+ dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
+ is_drain = true;
+
+ do {
+ ne = dr_poll_cq(send_ring->cq, 1);
+ if (unlikely(ne < 0)) {
+ mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
+ send_ring->qp->qpn);
+ send_ring->err_state = true;
+ return ne;
+ } else if (ne == 1) {
+ send_ring->pending_wqe -= send_ring->signal_th;
+ }
+ } while (is_drain && send_ring->pending_wqe);
+
+ return 0;
+}
+
+static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
+ struct postsend_info *send_info)
+{
+ send_ring->pending_wqe++;
+
+ if (send_ring->pending_wqe % send_ring->signal_th == 0)
+ send_info->write.send_flags |= IB_SEND_SIGNALED;
+
+ send_ring->pending_wqe++;
+ send_info->read.length = send_info->write.length;
+ /* Read into the same write area */
+ send_info->read.addr = (uintptr_t)send_info->write.addr;
+ send_info->read.lkey = send_ring->mr->mkey;
+
+ if (send_ring->pending_wqe % send_ring->signal_th == 0)
+ send_info->read.send_flags = IB_SEND_SIGNALED;
+ else
+ send_info->read.send_flags = 0;
+}
+
+static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
+ struct postsend_info *send_info)
+{
+ struct mlx5dr_send_ring *send_ring = dmn->send_ring;
+ u32 buff_offset;
+ int ret;
+
+ if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+ send_ring->err_state)) {
+ mlx5_core_dbg_once(dmn->mdev,
+ "Skipping post send: QP err state: %d, device state: %d\n",
+ send_ring->err_state, dmn->mdev->state);
+ return 0;
+ }
+
+ spin_lock(&send_ring->lock);
+
+ ret = dr_handle_pending_wc(dmn, send_ring);
+ if (ret)
+ goto out_unlock;
+
+ if (send_info->write.length > dmn->info.max_inline_size) {
+ buff_offset = (send_ring->tx_head &
+ (dmn->send_ring->signal_th - 1)) *
+ send_ring->max_post_send_size;
+ /* Copy to ring mr */
+ memcpy(send_ring->buf + buff_offset,
+ (void *)(uintptr_t)send_info->write.addr,
+ send_info->write.length);
+ send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
+ send_info->write.lkey = send_ring->mr->mkey;
+ }
+
+ send_ring->tx_head++;
+ dr_fill_data_segs(send_ring, send_info);
+ dr_post_send(send_ring->qp, send_info);
+
+out_unlock:
+ spin_unlock(&send_ring->lock);
+ return ret;
+}
+
+static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 **data,
+ u32 *byte_size,
+ int *iterations,
+ int *num_stes)
+{
+ u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
+ int alloc_size;
+
+ if (chunk_byte_size > dmn->send_ring->max_post_send_size) {
+ *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size;
+ *byte_size = dmn->send_ring->max_post_send_size;
+ alloc_size = *byte_size;
+ *num_stes = *byte_size / DR_STE_SIZE;
+ } else {
+ *iterations = 1;
+ *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
+ alloc_size = *num_stes * DR_STE_SIZE;
+ }
+
+ *data = kvzalloc(alloc_size, GFP_KERNEL);
+ if (!*data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
+ *
+ * @dmn: Domain
+ * @ste: The ste struct that contains the data (at
+ * least part of it)
+ * @data: The real data to send size data
+ * @size: for writing.
+ * @offset: The offset from the icm mapped data to
+ * start write to this for write only part of the
+ * buffer.
+ *
+ * Return: 0 on success.
+ */
+int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
+ u8 *data, u16 size, u16 offset)
+{
+ struct postsend_info send_info = {};
+
+ mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size);
+
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = size;
+ send_info.write.lkey = 0;
+ send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk);
+
+ return dr_postsend_icm_data(dmn, &send_info);
+}
+
+int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *formatted_ste, u8 *mask)
+{
+ u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
+ int num_stes_per_iter;
+ int iterations;
+ u8 *data;
+ int ret;
+ int i;
+ int j;
+
+ ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
+ &iterations, &num_stes_per_iter);
+ if (ret)
+ return ret;
+
+ mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE);
+
+ /* Send the data iteration times */
+ for (i = 0; i < iterations; i++) {
+ u32 ste_index = i * (byte_size / DR_STE_SIZE);
+ struct postsend_info send_info = {};
+
+ /* Copy all ste's on the data buffer
+ * need to add the bit_mask
+ */
+ for (j = 0; j < num_stes_per_iter; j++) {
+ struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j];
+ u32 ste_off = j * DR_STE_SIZE;
+
+ if (mlx5dr_ste_is_not_used(ste)) {
+ memcpy(data + ste_off,
+ formatted_ste, DR_STE_SIZE);
+ } else {
+ /* Copy data */
+ memcpy(data + ste_off,
+ htbl->chunk->hw_ste_arr +
+ DR_STE_SIZE_REDUCED * (ste_index + j),
+ DR_STE_SIZE_REDUCED);
+ /* Copy bit_mask */
+ memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
+ mask, DR_STE_SIZE_MASK);
+ /* Only when we have mask we need to re-arrange the STE */
+ mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx,
+ data + (j * DR_STE_SIZE),
+ DR_STE_SIZE);
+ }
+ }
+
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = byte_size;
+ send_info.write.lkey = 0;
+ send_info.remote_addr =
+ mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
+
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ goto out_free;
+ }
+
+out_free:
+ kvfree(data);
+ return ret;
+}
+
+/* Initialize htble with default STEs */
+int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *ste_init_data,
+ bool update_hw_ste)
+{
+ u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
+ int iterations;
+ int num_stes;
+ u8 *copy_dst;
+ u8 *data;
+ int ret;
+ int i;
+
+ ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
+ &iterations, &num_stes);
+ if (ret)
+ return ret;
+
+ if (update_hw_ste) {
+ /* Copy the reduced STE to hash table ste_arr */
+ for (i = 0; i < num_stes; i++) {
+ copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
+ memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
+ }
+ }
+
+ mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE);
+
+ /* Copy the same STE on the data buffer */
+ for (i = 0; i < num_stes; i++) {
+ copy_dst = data + i * DR_STE_SIZE;
+ memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
+ }
+
+ /* Send the data iteration times */
+ for (i = 0; i < iterations; i++) {
+ u8 ste_index = i * (byte_size / DR_STE_SIZE);
+ struct postsend_info send_info = {};
+
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = byte_size;
+ send_info.write.lkey = 0;
+ send_info.remote_addr =
+ mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
+
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ goto out_free;
+ }
+
+out_free:
+ kvfree(data);
+ return ret;
+}
+
+int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *action)
+{
+ struct postsend_info send_info = {};
+ int ret;
+
+ send_info.write.addr = (uintptr_t)action->rewrite->data;
+ send_info.write.length = action->rewrite->num_of_actions *
+ DR_MODIFY_ACTION_SIZE;
+ send_info.write.lkey = 0;
+ send_info.remote_addr =
+ mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk);
+ send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk);
+
+ ret = dr_postsend_icm_data(dmn, &send_info);
+
+ return ret;
+}
+
+static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
+ struct mlx5dr_qp *dr_qp,
+ int port)
+{
+ u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
+ MLX5_SET(qpc, qpc, rre, 1);
+ MLX5_SET(qpc, qpc, rwe, 1);
+
+ MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+ MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
+
+ return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
+}
+
+static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
+ struct mlx5dr_qp *dr_qp,
+ struct dr_qp_rts_attr *attr)
+{
+ u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
+
+ MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
+
+ MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
+ MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
+ MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
+
+ MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+ MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
+
+ return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
+}
+
+static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
+ struct mlx5dr_qp *dr_qp,
+ struct dr_qp_rtr_attr *attr)
+{
+ u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
+
+ MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
+
+ MLX5_SET(qpc, qpc, mtu, attr->mtu);
+ MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
+ MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
+ memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
+ attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
+ memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
+ attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
+ MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
+ attr->sgid_index);
+
+ if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
+ MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
+ attr->udp_src_port);
+
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
+ MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl);
+ MLX5_SET(qpc, qpc, min_rnr_nak, 1);
+
+ MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+ MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
+
+ return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
+}
+
+static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps)
+{
+ /* Check whether RC RoCE QP creation with force loopback is allowed.
+ * There are two separate capability bits for this:
+ * - force loopback when RoCE is enabled
+ * - force loopback when RoCE is disabled
+ */
+ return ((caps->roce_caps.roce_en &&
+ caps->roce_caps.fl_rc_qp_when_roce_enabled) ||
+ (!caps->roce_caps.roce_en &&
+ caps->roce_caps.fl_rc_qp_when_roce_disabled));
+}
+
+static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
+ struct dr_qp_rts_attr rts_attr = {};
+ struct dr_qp_rtr_attr rtr_attr = {};
+ enum ib_mtu mtu = IB_MTU_1024;
+ u16 gid_index = 0;
+ int port = 1;
+ int ret;
+
+ /* Init */
+ ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed modify QP rst2init\n");
+ return ret;
+ }
+
+ /* RTR */
+ rtr_attr.mtu = mtu;
+ rtr_attr.qp_num = dr_qp->qpn;
+ rtr_attr.min_rnr_timer = 12;
+ rtr_attr.port_num = port;
+ rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp;
+
+ /* If QP creation with force loopback is allowed, then there
+ * is no need for GID index when creating the QP.
+ * Otherwise we query GID attributes and use GID index.
+ */
+ rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps);
+ if (!rtr_attr.fl) {
+ ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index,
+ &rtr_attr.dgid_attr);
+ if (ret)
+ return ret;
+
+ rtr_attr.sgid_index = gid_index;
+ }
+
+ ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
+ return ret;
+ }
+
+ /* RTS */
+ rts_attr.timeout = 14;
+ rts_attr.retry_cnt = 7;
+ rts_attr.rnr_retry = 7;
+
+ ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void dr_cq_complete(struct mlx5_core_cq *mcq,
+ struct mlx5_eqe *eqe)
+{
+ pr_err("CQ completion CQ: #%u\n", mcq->cqn);
+}
+
+static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
+ struct mlx5_uars_page *uar,
+ size_t ncqe)
+{
+ u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ struct mlx5_wq_param wqp;
+ struct mlx5_cqe64 *cqe;
+ struct mlx5dr_cq *cq;
+ int inlen, err, eqn;
+ void *cqc, *in;
+ __be64 *pas;
+ int vector;
+ u32 i;
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return NULL;
+
+ ncqe = roundup_pow_of_two(ncqe);
+ MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
+ &cq->wq_ctrl);
+ if (err)
+ goto out;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+ cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ goto err_cqwq;
+
+ vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
+ err = mlx5_vector2eqn(mdev, vector, &eqn);
+ if (err) {
+ kvfree(in);
+ goto err_cqwq;
+ }
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
+ MLX5_SET(cqc, cqc, uar_page, uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+ mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
+
+ cq->mcq.comp = dr_cq_complete;
+
+ err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
+ kvfree(in);
+
+ if (err)
+ goto err_cqwq;
+
+ cq->mcq.cqe_sz = 64;
+ cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
+ cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
+ *cq->mcq.set_ci_db = 0;
+
+ /* set no-zero value, in order to avoid the HW to run db-recovery on
+ * CQ that used in polling mode.
+ */
+ *cq->mcq.arm_db = cpu_to_be32(2 << 28);
+
+ cq->mcq.vector = 0;
+ cq->mcq.uar = uar;
+
+ return cq;
+
+err_cqwq:
+ mlx5_wq_destroy(&cq->wq_ctrl);
+out:
+ kfree(cq);
+ return NULL;
+}
+
+static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
+{
+ mlx5_core_destroy_cq(mdev, &cq->mcq);
+ mlx5_wq_destroy(&cq->wq_ctrl);
+ kfree(cq);
+}
+
+static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
+{
+ u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, a, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
+}
+
+static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
+ u32 pdn, void *buf, size_t size)
+{
+ struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ struct device *dma_device;
+ dma_addr_t dma_addr;
+ int err;
+
+ if (!mr)
+ return NULL;
+
+ dma_device = mlx5_core_dma_dev(mdev);
+ dma_addr = dma_map_single(dma_device, buf, size,
+ DMA_BIDIRECTIONAL);
+ err = dma_mapping_error(dma_device, dma_addr);
+ if (err) {
+ mlx5_core_warn(mdev, "Can't dma buf\n");
+ kfree(mr);
+ return NULL;
+ }
+
+ err = dr_create_mkey(mdev, pdn, &mr->mkey);
+ if (err) {
+ mlx5_core_warn(mdev, "Can't create mkey\n");
+ dma_unmap_single(dma_device, dma_addr, size,
+ DMA_BIDIRECTIONAL);
+ kfree(mr);
+ return NULL;
+ }
+
+ mr->dma_addr = dma_addr;
+ mr->size = size;
+ mr->addr = buf;
+
+ return mr;
+}
+
+static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
+{
+ mlx5_core_destroy_mkey(mdev, mr->mkey);
+ dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
+ DMA_BIDIRECTIONAL);
+ kfree(mr);
+}
+
+int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
+{
+ struct dr_qp_init_attr init_attr = {};
+ int cq_size;
+ int size;
+ int ret;
+
+ dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
+ if (!dmn->send_ring)
+ return -ENOMEM;
+
+ cq_size = QUEUE_SIZE + 1;
+ dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
+ if (!dmn->send_ring->cq) {
+ mlx5dr_err(dmn, "Failed creating CQ\n");
+ ret = -ENOMEM;
+ goto free_send_ring;
+ }
+
+ init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
+ init_attr.pdn = dmn->pdn;
+ init_attr.uar = dmn->uar;
+ init_attr.max_send_wr = QUEUE_SIZE;
+
+ /* Isolated VL is applicable only if force loopback is supported */
+ if (dr_send_allow_fl(&dmn->info.caps))
+ init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc;
+
+ spin_lock_init(&dmn->send_ring->lock);
+
+ dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
+ if (!dmn->send_ring->qp) {
+ mlx5dr_err(dmn, "Failed creating QP\n");
+ ret = -ENOMEM;
+ goto clean_cq;
+ }
+
+ dmn->send_ring->cq->qp = dmn->send_ring->qp;
+
+ dmn->info.max_send_wr = QUEUE_SIZE;
+ dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
+ DR_STE_SIZE);
+
+ dmn->send_ring->signal_th = dmn->info.max_send_wr /
+ SIGNAL_PER_DIV_QUEUE;
+
+ /* Prepare qp to be used */
+ ret = dr_prepare_qp_to_rts(dmn);
+ if (ret)
+ goto clean_qp;
+
+ dmn->send_ring->max_post_send_size =
+ mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
+ DR_ICM_TYPE_STE);
+
+ /* Allocating the max size as a buffer for writing */
+ size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
+ dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
+ if (!dmn->send_ring->buf) {
+ ret = -ENOMEM;
+ goto clean_qp;
+ }
+
+ dmn->send_ring->buf_size = size;
+
+ dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
+ dmn->pdn, dmn->send_ring->buf, size);
+ if (!dmn->send_ring->mr) {
+ ret = -ENOMEM;
+ goto free_mem;
+ }
+
+ dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
+ dmn->pdn, dmn->send_ring->sync_buff,
+ MIN_READ_SYNC);
+ if (!dmn->send_ring->sync_mr) {
+ ret = -ENOMEM;
+ goto clean_mr;
+ }
+
+ return 0;
+
+clean_mr:
+ dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
+free_mem:
+ kfree(dmn->send_ring->buf);
+clean_qp:
+ dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
+clean_cq:
+ dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
+free_send_ring:
+ kfree(dmn->send_ring);
+
+ return ret;
+}
+
+void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring)
+{
+ dr_destroy_qp(dmn->mdev, send_ring->qp);
+ dr_destroy_cq(dmn->mdev, send_ring->cq);
+ dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
+ dr_dereg_mr(dmn->mdev, send_ring->mr);
+ kfree(send_ring->buf);
+ kfree(send_ring);
+}
+
+int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_send_ring *send_ring = dmn->send_ring;
+ struct postsend_info send_info = {};
+ u8 data[DR_STE_SIZE];
+ int num_of_sends_req;
+ int ret;
+ int i;
+
+ /* Sending this amount of requests makes sure we will get drain */
+ num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
+
+ /* Send fake requests forcing the last to be signaled */
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = DR_STE_SIZE;
+ send_info.write.lkey = 0;
+ /* Using the sync_mr in order to write/read */
+ send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
+ send_info.rkey = send_ring->sync_mr->mkey;
+
+ for (i = 0; i < num_of_sends_req; i++) {
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ return ret;
+ }
+
+ spin_lock(&send_ring->lock);
+ ret = dr_handle_pending_wc(dmn, send_ring);
+ spin_unlock(&send_ring->lock);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
new file mode 100644
index 000000000..7815a629d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -0,0 +1,1390 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/types.h>
+#include <linux/crc32.h>
+#include "dr_ste.h"
+
+struct dr_hw_ste_format {
+ u8 ctrl[DR_STE_SIZE_CTRL];
+ u8 tag[DR_STE_SIZE_TAG];
+ u8 mask[DR_STE_SIZE_MASK];
+};
+
+static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
+{
+ u32 crc = crc32(0, input_data, length);
+
+ return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
+ ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
+}
+
+bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->sw_format_ver > MLX5_STEERING_FORMAT_CONNECTX_5;
+}
+
+u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
+{
+ u32 num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ u8 masked[DR_STE_SIZE_TAG] = {};
+ u32 crc32, index;
+ u16 bit;
+ int i;
+
+ /* Don't calculate CRC if the result is predicted */
+ if (num_entries == 1 || htbl->byte_mask == 0)
+ return 0;
+
+ /* Mask tag using byte mask, bit per byte */
+ bit = 1 << (DR_STE_SIZE_TAG - 1);
+ for (i = 0; i < DR_STE_SIZE_TAG; i++) {
+ if (htbl->byte_mask & bit)
+ masked[i] = hw_ste->tag[i];
+
+ bit = bit >> 1;
+ }
+
+ crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG);
+ index = crc32 & (num_entries - 1);
+
+ return index;
+}
+
+u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask)
+{
+ u16 byte_mask = 0;
+ int i;
+
+ for (i = 0; i < DR_STE_SIZE_MASK; i++) {
+ byte_mask = byte_mask << 1;
+ if (bit_mask[i] == 0xff)
+ byte_mask |= 1;
+ }
+ return byte_mask;
+}
+
+static u8 *dr_ste_get_tag(u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+
+ return hw_ste->tag;
+}
+
+void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+
+ memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK);
+}
+
+static void dr_ste_set_always_hit(struct dr_hw_ste_format *hw_ste)
+{
+ memset(&hw_ste->tag, 0, sizeof(hw_ste->tag));
+ memset(&hw_ste->mask, 0, sizeof(hw_ste->mask));
+}
+
+static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste)
+{
+ hw_ste->tag[0] = 0xdc;
+ hw_ste->mask[0] = 0;
+}
+
+void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste_p, u64 miss_addr)
+{
+ ste_ctx->set_miss_addr(hw_ste_p, miss_addr);
+}
+
+static void dr_ste_always_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste, u64 miss_addr)
+{
+ ste_ctx->set_next_lu_type(hw_ste, MLX5DR_STE_LU_TYPE_DONT_CARE);
+ ste_ctx->set_miss_addr(hw_ste, miss_addr);
+ dr_ste_set_always_miss((struct dr_hw_ste_format *)hw_ste);
+}
+
+void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste, u64 icm_addr, u32 ht_size)
+{
+ ste_ctx->set_hit_addr(hw_ste, icm_addr, ht_size);
+}
+
+u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste)
+{
+ u64 base_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(ste->htbl->chunk);
+ u32 index = ste - ste->htbl->chunk->ste_arr;
+
+ return base_icm_addr + DR_STE_SIZE * index;
+}
+
+u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste)
+{
+ u32 index = ste - ste->htbl->chunk->ste_arr;
+
+ return mlx5dr_icm_pool_get_chunk_mr_addr(ste->htbl->chunk) + DR_STE_SIZE * index;
+}
+
+u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste)
+{
+ u64 index = ste - ste->htbl->chunk->ste_arr;
+
+ return ste->htbl->chunk->hw_ste_arr + DR_STE_SIZE_REDUCED * index;
+}
+
+struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste)
+{
+ u32 index = ste - ste->htbl->chunk->ste_arr;
+
+ return &ste->htbl->chunk->miss_list[index];
+}
+
+static void dr_ste_always_hit_htbl(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste,
+ struct mlx5dr_ste_htbl *next_htbl)
+{
+ struct mlx5dr_icm_chunk *chunk = next_htbl->chunk;
+
+ ste_ctx->set_byte_mask(hw_ste, next_htbl->byte_mask);
+ ste_ctx->set_next_lu_type(hw_ste, next_htbl->lu_type);
+ ste_ctx->set_hit_addr(hw_ste, mlx5dr_icm_pool_get_chunk_icm_addr(chunk),
+ mlx5dr_icm_pool_get_chunk_num_of_entries(chunk));
+
+ dr_ste_set_always_hit((struct dr_hw_ste_format *)hw_ste);
+}
+
+bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
+ u8 ste_location)
+{
+ return ste_location == nic_matcher->num_of_builders;
+}
+
+/* Replace relevant fields, except of:
+ * htbl - keep the origin htbl
+ * miss_list + list - already took the src from the list.
+ * icm_addr/mr_addr - depends on the hosting table.
+ *
+ * Before:
+ * | a | -> | b | -> | c | ->
+ *
+ * After:
+ * | a | -> | c | ->
+ * While the data that was in b copied to a.
+ */
+static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src)
+{
+ memcpy(mlx5dr_ste_get_hw_ste(dst), mlx5dr_ste_get_hw_ste(src),
+ DR_STE_SIZE_REDUCED);
+ dst->next_htbl = src->next_htbl;
+ if (dst->next_htbl)
+ dst->next_htbl->pointing_ste = dst;
+
+ dst->refcount = src->refcount;
+}
+
+/* Free ste which is the head and the only one in miss_list */
+static void
+dr_ste_remove_head_ste(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste *ste,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste_send_info *ste_info_head,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste_htbl *stats_tbl)
+{
+ u8 tmp_data_ste[DR_STE_SIZE] = {};
+ u64 miss_addr;
+
+ miss_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+
+ /* Use temp ste because dr_ste_always_miss_addr
+ * touches bit_mask area which doesn't exist at ste->hw_ste.
+ * Need to use a full-sized (DR_STE_SIZE) hw_ste.
+ */
+ memcpy(tmp_data_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED);
+ dr_ste_always_miss_addr(ste_ctx, tmp_data_ste, miss_addr);
+ memcpy(mlx5dr_ste_get_hw_ste(ste), tmp_data_ste, DR_STE_SIZE_REDUCED);
+
+ list_del_init(&ste->miss_list_node);
+
+ /* Write full STE size in order to have "always_miss" */
+ mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE,
+ 0, tmp_data_ste,
+ ste_info_head,
+ send_ste_list,
+ true /* Copy data */);
+
+ stats_tbl->ctrl.num_of_valid_entries--;
+}
+
+/* Free ste which is the head but NOT the only one in miss_list:
+ * |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0
+ */
+static void
+dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *ste,
+ struct mlx5dr_ste *next_ste,
+ struct mlx5dr_ste_send_info *ste_info_head,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste_htbl *stats_tbl)
+
+{
+ struct mlx5dr_ste_htbl *next_miss_htbl;
+ u8 hw_ste[DR_STE_SIZE] = {};
+ int sb_idx;
+
+ next_miss_htbl = next_ste->htbl;
+
+ /* Remove from the miss_list the next_ste before copy */
+ list_del_init(&next_ste->miss_list_node);
+
+ /* Move data from next into ste */
+ dr_ste_replace(ste, next_ste);
+
+ /* Update the rule on STE change */
+ mlx5dr_rule_set_last_member(next_ste->rule_rx_tx, ste, false);
+
+ /* Copy all 64 hw_ste bytes */
+ memcpy(hw_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED);
+ sb_idx = ste->ste_chain_location - 1;
+ mlx5dr_ste_set_bit_mask(hw_ste,
+ nic_matcher->ste_builder[sb_idx].bit_mask);
+
+ /* Del the htbl that contains the next_ste.
+ * The origin htbl stay with the same number of entries.
+ */
+ mlx5dr_htbl_put(next_miss_htbl);
+
+ mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE,
+ 0, hw_ste,
+ ste_info_head,
+ send_ste_list,
+ true /* Copy data */);
+
+ stats_tbl->ctrl.num_of_collisions--;
+ stats_tbl->ctrl.num_of_valid_entries--;
+}
+
+/* Free ste that is located in the middle of the miss list:
+ * |__| -->|_prev_ste_|->|_ste_|-->|_next_ste_|
+ */
+static void dr_ste_remove_middle_ste(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste *ste,
+ struct mlx5dr_ste_send_info *ste_info,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste_htbl *stats_tbl)
+{
+ struct mlx5dr_ste *prev_ste;
+ u64 miss_addr;
+
+ prev_ste = list_prev_entry(ste, miss_list_node);
+ if (WARN_ON(!prev_ste))
+ return;
+
+ miss_addr = ste_ctx->get_miss_addr(mlx5dr_ste_get_hw_ste(ste));
+ ste_ctx->set_miss_addr(mlx5dr_ste_get_hw_ste(prev_ste), miss_addr);
+
+ mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_CTRL, 0,
+ mlx5dr_ste_get_hw_ste(prev_ste),
+ ste_info, send_ste_list,
+ true /* Copy data*/);
+
+ list_del_init(&ste->miss_list_node);
+
+ stats_tbl->ctrl.num_of_valid_entries--;
+ stats_tbl->ctrl.num_of_collisions--;
+}
+
+void mlx5dr_ste_free(struct mlx5dr_ste *ste,
+ struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ struct mlx5dr_ste_send_info *cur_ste_info, *tmp_ste_info;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
+ struct mlx5dr_ste_send_info ste_info_head;
+ struct mlx5dr_ste *next_ste, *first_ste;
+ bool put_on_origin_table = true;
+ struct mlx5dr_ste_htbl *stats_tbl;
+ LIST_HEAD(send_ste_list);
+
+ first_ste = list_first_entry(mlx5dr_ste_get_miss_list(ste),
+ struct mlx5dr_ste, miss_list_node);
+ stats_tbl = first_ste->htbl;
+
+ /* Two options:
+ * 1. ste is head:
+ * a. head ste is the only ste in the miss list
+ * b. head ste is not the only ste in the miss-list
+ * 2. ste is not head
+ */
+ if (first_ste == ste) { /* Ste is the head */
+ struct mlx5dr_ste *last_ste;
+
+ last_ste = list_last_entry(mlx5dr_ste_get_miss_list(ste),
+ struct mlx5dr_ste, miss_list_node);
+ if (last_ste == first_ste)
+ next_ste = NULL;
+ else
+ next_ste = list_next_entry(ste, miss_list_node);
+
+ if (!next_ste) {
+ /* One and only entry in the list */
+ dr_ste_remove_head_ste(ste_ctx, ste,
+ nic_matcher,
+ &ste_info_head,
+ &send_ste_list,
+ stats_tbl);
+ } else {
+ /* First but not only entry in the list */
+ dr_ste_replace_head_ste(nic_matcher, ste,
+ next_ste, &ste_info_head,
+ &send_ste_list, stats_tbl);
+ put_on_origin_table = false;
+ }
+ } else { /* Ste in the middle of the list */
+ dr_ste_remove_middle_ste(ste_ctx, ste,
+ &ste_info_head, &send_ste_list,
+ stats_tbl);
+ }
+
+ /* Update HW */
+ list_for_each_entry_safe(cur_ste_info, tmp_ste_info,
+ &send_ste_list, send_list) {
+ list_del(&cur_ste_info->send_list);
+ mlx5dr_send_postsend_ste(dmn, cur_ste_info->ste,
+ cur_ste_info->data, cur_ste_info->size,
+ cur_ste_info->offset);
+ }
+
+ if (put_on_origin_table)
+ mlx5dr_htbl_put(ste->htbl);
+}
+
+bool mlx5dr_ste_equal_tag(void *src, void *dst)
+{
+ struct dr_hw_ste_format *s_hw_ste = (struct dr_hw_ste_format *)src;
+ struct dr_hw_ste_format *d_hw_ste = (struct dr_hw_ste_format *)dst;
+
+ return !memcmp(s_hw_ste->tag, d_hw_ste->tag, DR_STE_SIZE_TAG);
+}
+
+void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste,
+ struct mlx5dr_ste_htbl *next_htbl)
+{
+ u64 icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(next_htbl->chunk);
+ u32 num_entries =
+ mlx5dr_icm_pool_get_chunk_num_of_entries(next_htbl->chunk);
+
+ ste_ctx->set_hit_addr(hw_ste, icm_addr, num_entries);
+}
+
+void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste_p, u32 ste_size)
+{
+ if (ste_ctx->prepare_for_postsend)
+ ste_ctx->prepare_for_postsend(hw_ste_p, ste_size);
+}
+
+/* Init one ste as a pattern for ste data array */
+void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx,
+ u16 gvmi,
+ enum mlx5dr_domain_nic_type nic_type,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *formatted_ste,
+ struct mlx5dr_htbl_connect_info *connect_info)
+{
+ bool is_rx = nic_type == DR_DOMAIN_NIC_TYPE_RX;
+ u8 tmp_hw_ste[DR_STE_SIZE] = {0};
+
+ ste_ctx->ste_init(formatted_ste, htbl->lu_type, is_rx, gvmi);
+
+ /* Use temp ste because dr_ste_always_miss_addr/hit_htbl
+ * touches bit_mask area which doesn't exist at ste->hw_ste.
+ * Need to use a full-sized (DR_STE_SIZE) hw_ste.
+ */
+ memcpy(tmp_hw_ste, formatted_ste, DR_STE_SIZE_REDUCED);
+ if (connect_info->type == CONNECT_HIT)
+ dr_ste_always_hit_htbl(ste_ctx, tmp_hw_ste,
+ connect_info->hit_next_htbl);
+ else
+ dr_ste_always_miss_addr(ste_ctx, tmp_hw_ste,
+ connect_info->miss_icm_addr);
+ memcpy(formatted_ste, tmp_hw_ste, DR_STE_SIZE_REDUCED);
+}
+
+int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain_rx_tx *nic_dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ struct mlx5dr_htbl_connect_info *connect_info,
+ bool update_hw_ste)
+{
+ u8 formatted_ste[DR_STE_SIZE] = {};
+
+ mlx5dr_ste_set_formatted_ste(dmn->ste_ctx,
+ dmn->info.caps.gvmi,
+ nic_dmn->type,
+ htbl,
+ formatted_ste,
+ connect_info);
+
+ return mlx5dr_send_postsend_formatted_htbl(dmn, htbl, formatted_ste, update_hw_ste);
+}
+
+int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *ste,
+ u8 *cur_hw_ste,
+ enum mlx5dr_icm_chunk_size log_table_size)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_ste_htbl *next_htbl;
+
+ if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) {
+ u16 next_lu_type;
+ u16 byte_mask;
+
+ next_lu_type = ste_ctx->get_next_lu_type(cur_hw_ste);
+ byte_mask = ste_ctx->get_byte_mask(cur_hw_ste);
+
+ next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ log_table_size,
+ next_lu_type,
+ byte_mask);
+ if (!next_htbl) {
+ mlx5dr_dbg(dmn, "Failed allocating table\n");
+ return -ENOMEM;
+ }
+
+ /* Write new table to HW */
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr =
+ mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl,
+ &info, false)) {
+ mlx5dr_info(dmn, "Failed writing table to HW\n");
+ goto free_table;
+ }
+
+ mlx5dr_ste_set_hit_addr_by_next_htbl(ste_ctx,
+ cur_hw_ste, next_htbl);
+ ste->next_htbl = next_htbl;
+ next_htbl->pointing_ste = ste;
+ }
+
+ return 0;
+
+free_table:
+ mlx5dr_ste_htbl_free(next_htbl);
+ return -ENOENT;
+}
+
+struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size,
+ u16 lu_type, u16 byte_mask)
+{
+ struct mlx5dr_icm_chunk *chunk;
+ struct mlx5dr_ste_htbl *htbl;
+ u32 num_entries;
+ int i;
+
+ htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
+ if (!htbl)
+ return NULL;
+
+ chunk = mlx5dr_icm_alloc_chunk(pool, chunk_size);
+ if (!chunk)
+ goto out_free_htbl;
+
+ htbl->chunk = chunk;
+ htbl->lu_type = lu_type;
+ htbl->byte_mask = byte_mask;
+ htbl->refcount = 0;
+ num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk);
+
+ for (i = 0; i < num_entries; i++) {
+ struct mlx5dr_ste *ste = &chunk->ste_arr[i];
+
+ ste->htbl = htbl;
+ ste->refcount = 0;
+ INIT_LIST_HEAD(&ste->miss_list_node);
+ INIT_LIST_HEAD(&chunk->miss_list[i]);
+ }
+
+ return htbl;
+
+out_free_htbl:
+ kfree(htbl);
+ return NULL;
+}
+
+int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl)
+{
+ if (htbl->refcount)
+ return -EBUSY;
+
+ mlx5dr_icm_free_chunk(htbl->chunk);
+ kfree(htbl);
+ return 0;
+}
+
+void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u8 *hw_ste_arr,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ ste_ctx->set_actions_tx(dmn, action_type_set, ste_ctx->actions_caps,
+ hw_ste_arr, attr, added_stes);
+}
+
+void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u8 *hw_ste_arr,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ ste_ctx->set_actions_rx(dmn, action_type_set, ste_ctx->actions_caps,
+ hw_ste_arr, attr, added_stes);
+}
+
+const struct mlx5dr_ste_action_modify_field *
+mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field)
+{
+ const struct mlx5dr_ste_action_modify_field *hw_field;
+
+ if (sw_field >= ste_ctx->modify_field_arr_sz)
+ return NULL;
+
+ hw_field = &ste_ctx->modify_field_arr[sw_field];
+ if (!hw_field->end && !hw_field->start)
+ return NULL;
+
+ return hw_field;
+}
+
+void mlx5dr_ste_set_action_set(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ ste_ctx->set_action_set((u8 *)hw_action,
+ hw_field, shifter, length, data);
+}
+
+void mlx5dr_ste_set_action_add(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ ste_ctx->set_action_add((u8 *)hw_action,
+ hw_field, shifter, length, data);
+}
+
+void mlx5dr_ste_set_action_copy(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter)
+{
+ ste_ctx->set_action_copy((u8 *)hw_action,
+ dst_hw_field, dst_shifter, dst_len,
+ src_hw_field, src_shifter);
+}
+
+int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
+ void *data, u32 data_sz,
+ u8 *hw_action, u32 hw_action_sz,
+ u16 *used_hw_action_num)
+{
+ /* Only Ethernet frame is supported, with VLAN (18) or without (14) */
+ if (data_sz != HDR_LEN_L2 && data_sz != HDR_LEN_L2_W_VLAN)
+ return -EINVAL;
+
+ return ste_ctx->set_action_decap_l3_list(data, data_sz,
+ hw_action, hw_action_sz,
+ used_hw_action_num);
+}
+
+static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn,
+ struct mlx5dr_match_spec *spec)
+{
+ if (spec->ip_version) {
+ if (spec->ip_version != 0xf) {
+ mlx5dr_err(dmn,
+ "Partial ip_version mask with src/dst IP is not supported\n");
+ return -EINVAL;
+ }
+ } else if (spec->ethertype != 0xffff &&
+ (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) {
+ mlx5dr_err(dmn,
+ "Partial/no ethertype mask with src/dst IP is not supported\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
+ u8 match_criteria,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_match_param *value)
+{
+ if (value)
+ return 0;
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
+ if (mask->misc.source_port && mask->misc.source_port != 0xffff) {
+ mlx5dr_err(dmn,
+ "Partial mask source_port is not supported\n");
+ return -EINVAL;
+ }
+ if (mask->misc.source_eswitch_owner_vhca_id &&
+ mask->misc.source_eswitch_owner_vhca_id != 0xffff) {
+ mlx5dr_err(dmn,
+ "Partial mask source_eswitch_owner_vhca_id is not supported\n");
+ return -EINVAL;
+ }
+ }
+
+ if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) &&
+ dr_ste_build_pre_check_spec(dmn, &mask->outer))
+ return -EINVAL;
+
+ if ((match_criteria & DR_MATCHER_CRITERIA_INNER) &&
+ dr_ste_build_pre_check_spec(dmn, &mask->inner))
+ return -EINVAL;
+
+ return 0;
+}
+
+int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_match_param *value,
+ u8 *ste_arr)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ bool is_rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
+ struct mlx5dr_ste_build *sb;
+ int ret, i;
+
+ ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria,
+ &matcher->mask, value);
+ if (ret)
+ return ret;
+
+ sb = nic_matcher->ste_builder;
+ for (i = 0; i < nic_matcher->num_of_builders; i++) {
+ ste_ctx->ste_init(ste_arr,
+ sb->lu_type,
+ is_rx,
+ dmn->info.caps.gvmi);
+
+ mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask);
+
+ ret = sb->ste_build_tag_func(value, sb, dr_ste_get_tag(ste_arr));
+ if (ret)
+ return ret;
+
+ /* Connect the STEs */
+ if (i < (nic_matcher->num_of_builders - 1)) {
+ /* Need the next builder for these fields,
+ * not relevant for the last ste in the chain.
+ */
+ sb++;
+ ste_ctx->set_next_lu_type(ste_arr, sb->lu_type);
+ ste_ctx->set_byte_mask(ste_arr, sb->byte_mask);
+ }
+ ste_arr += DR_STE_SIZE;
+ }
+ return 0;
+}
+
+#define IFC_GET_CLR(typ, p, fld, clear) ({ \
+ void *__p = (p); \
+ u32 __t = MLX5_GET(typ, __p, fld); \
+ if (clear) \
+ MLX5_SET(typ, __p, fld, 0); \
+ __t; \
+})
+
+#define memcpy_and_clear(to, from, len, clear) ({ \
+ void *__to = (to), *__from = (from); \
+ size_t __len = (len); \
+ memcpy(__to, __from, __len); \
+ if (clear) \
+ memset(__from, 0, __len); \
+})
+
+static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec, bool clr)
+{
+ spec->gre_c_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_c_present, clr);
+ spec->gre_k_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_k_present, clr);
+ spec->gre_s_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_s_present, clr);
+ spec->source_vhca_port = IFC_GET_CLR(fte_match_set_misc, mask, source_vhca_port, clr);
+ spec->source_sqn = IFC_GET_CLR(fte_match_set_misc, mask, source_sqn, clr);
+
+ spec->source_port = IFC_GET_CLR(fte_match_set_misc, mask, source_port, clr);
+ spec->source_eswitch_owner_vhca_id =
+ IFC_GET_CLR(fte_match_set_misc, mask, source_eswitch_owner_vhca_id, clr);
+
+ spec->outer_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_prio, clr);
+ spec->outer_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cfi, clr);
+ spec->outer_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_vid, clr);
+ spec->inner_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_prio, clr);
+ spec->inner_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cfi, clr);
+ spec->inner_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_vid, clr);
+
+ spec->outer_second_cvlan_tag =
+ IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cvlan_tag, clr);
+ spec->inner_second_cvlan_tag =
+ IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cvlan_tag, clr);
+ spec->outer_second_svlan_tag =
+ IFC_GET_CLR(fte_match_set_misc, mask, outer_second_svlan_tag, clr);
+ spec->inner_second_svlan_tag =
+ IFC_GET_CLR(fte_match_set_misc, mask, inner_second_svlan_tag, clr);
+ spec->gre_protocol = IFC_GET_CLR(fte_match_set_misc, mask, gre_protocol, clr);
+
+ spec->gre_key_h = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.hi, clr);
+ spec->gre_key_l = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.lo, clr);
+
+ spec->vxlan_vni = IFC_GET_CLR(fte_match_set_misc, mask, vxlan_vni, clr);
+
+ spec->geneve_vni = IFC_GET_CLR(fte_match_set_misc, mask, geneve_vni, clr);
+ spec->geneve_tlv_option_0_exist =
+ IFC_GET_CLR(fte_match_set_misc, mask, geneve_tlv_option_0_exist, clr);
+ spec->geneve_oam = IFC_GET_CLR(fte_match_set_misc, mask, geneve_oam, clr);
+
+ spec->outer_ipv6_flow_label =
+ IFC_GET_CLR(fte_match_set_misc, mask, outer_ipv6_flow_label, clr);
+
+ spec->inner_ipv6_flow_label =
+ IFC_GET_CLR(fte_match_set_misc, mask, inner_ipv6_flow_label, clr);
+
+ spec->geneve_opt_len = IFC_GET_CLR(fte_match_set_misc, mask, geneve_opt_len, clr);
+ spec->geneve_protocol_type =
+ IFC_GET_CLR(fte_match_set_misc, mask, geneve_protocol_type, clr);
+
+ spec->bth_dst_qp = IFC_GET_CLR(fte_match_set_misc, mask, bth_dst_qp, clr);
+}
+
+static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec, bool clr)
+{
+ __be32 raw_ip[4];
+
+ spec->smac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_47_16, clr);
+
+ spec->smac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_15_0, clr);
+ spec->ethertype = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ethertype, clr);
+
+ spec->dmac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_47_16, clr);
+
+ spec->dmac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_15_0, clr);
+ spec->first_prio = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_prio, clr);
+ spec->first_cfi = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_cfi, clr);
+ spec->first_vid = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_vid, clr);
+
+ spec->ip_protocol = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_protocol, clr);
+ spec->ip_dscp = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_dscp, clr);
+ spec->ip_ecn = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_ecn, clr);
+ spec->cvlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, cvlan_tag, clr);
+ spec->svlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, svlan_tag, clr);
+ spec->frag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, frag, clr);
+ spec->ip_version = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_version, clr);
+ spec->tcp_flags = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_flags, clr);
+ spec->tcp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_sport, clr);
+ spec->tcp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_dport, clr);
+
+ spec->ipv4_ihl = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ipv4_ihl, clr);
+ spec->ttl_hoplimit = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ttl_hoplimit, clr);
+
+ spec->udp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_sport, clr);
+ spec->udp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_dport, clr);
+
+ memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(raw_ip), clr);
+
+ spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]);
+ spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]);
+ spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]);
+ spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]);
+
+ memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(raw_ip), clr);
+
+ spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]);
+ spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]);
+ spec->dst_ip_63_32 = be32_to_cpu(raw_ip[2]);
+ spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]);
+}
+
+static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec, bool clr)
+{
+ spec->outer_first_mpls_label =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_label, clr);
+ spec->outer_first_mpls_exp =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp, clr);
+ spec->outer_first_mpls_s_bos =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos, clr);
+ spec->outer_first_mpls_ttl =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl, clr);
+ spec->inner_first_mpls_label =
+ IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_label, clr);
+ spec->inner_first_mpls_exp =
+ IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp, clr);
+ spec->inner_first_mpls_s_bos =
+ IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos, clr);
+ spec->inner_first_mpls_ttl =
+ IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl, clr);
+ spec->outer_first_mpls_over_gre_label =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label, clr);
+ spec->outer_first_mpls_over_gre_exp =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp, clr);
+ spec->outer_first_mpls_over_gre_s_bos =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos, clr);
+ spec->outer_first_mpls_over_gre_ttl =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl, clr);
+ spec->outer_first_mpls_over_udp_label =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label, clr);
+ spec->outer_first_mpls_over_udp_exp =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp, clr);
+ spec->outer_first_mpls_over_udp_s_bos =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos, clr);
+ spec->outer_first_mpls_over_udp_ttl =
+ IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl, clr);
+ spec->metadata_reg_c_7 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_7, clr);
+ spec->metadata_reg_c_6 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_6, clr);
+ spec->metadata_reg_c_5 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_5, clr);
+ spec->metadata_reg_c_4 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_4, clr);
+ spec->metadata_reg_c_3 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_3, clr);
+ spec->metadata_reg_c_2 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_2, clr);
+ spec->metadata_reg_c_1 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_1, clr);
+ spec->metadata_reg_c_0 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_0, clr);
+ spec->metadata_reg_a = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_a, clr);
+}
+
+static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec, bool clr)
+{
+ spec->inner_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_seq_num, clr);
+ spec->outer_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_seq_num, clr);
+ spec->inner_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_ack_num, clr);
+ spec->outer_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_ack_num, clr);
+ spec->outer_vxlan_gpe_vni =
+ IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_vni, clr);
+ spec->outer_vxlan_gpe_next_protocol =
+ IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol, clr);
+ spec->outer_vxlan_gpe_flags =
+ IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_flags, clr);
+ spec->icmpv4_header_data = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_header_data, clr);
+ spec->icmpv6_header_data =
+ IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_header_data, clr);
+ spec->icmpv4_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_type, clr);
+ spec->icmpv4_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_code, clr);
+ spec->icmpv6_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_type, clr);
+ spec->icmpv6_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_code, clr);
+ spec->geneve_tlv_option_0_data =
+ IFC_GET_CLR(fte_match_set_misc3, mask, geneve_tlv_option_0_data, clr);
+ spec->gtpu_teid = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_teid, clr);
+ spec->gtpu_msg_flags = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_flags, clr);
+ spec->gtpu_msg_type = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_type, clr);
+ spec->gtpu_dw_0 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_0, clr);
+ spec->gtpu_dw_2 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_2, clr);
+ spec->gtpu_first_ext_dw_0 =
+ IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_first_ext_dw_0, clr);
+}
+
+static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec, bool clr)
+{
+ spec->prog_sample_field_id_0 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_0, clr);
+ spec->prog_sample_field_value_0 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_0, clr);
+ spec->prog_sample_field_id_1 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_1, clr);
+ spec->prog_sample_field_value_1 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_1, clr);
+ spec->prog_sample_field_id_2 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_2, clr);
+ spec->prog_sample_field_value_2 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_2, clr);
+ spec->prog_sample_field_id_3 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_3, clr);
+ spec->prog_sample_field_value_3 =
+ IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_3, clr);
+}
+
+static void dr_ste_copy_mask_misc5(char *mask, struct mlx5dr_match_misc5 *spec, bool clr)
+{
+ spec->macsec_tag_0 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_0, clr);
+ spec->macsec_tag_1 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_1, clr);
+ spec->macsec_tag_2 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_2, clr);
+ spec->macsec_tag_3 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_3, clr);
+ spec->tunnel_header_0 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_0, clr);
+ spec->tunnel_header_1 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_1, clr);
+ spec->tunnel_header_2 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_2, clr);
+ spec->tunnel_header_3 =
+ IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_3, clr);
+}
+
+void mlx5dr_ste_copy_param(u8 match_criteria,
+ struct mlx5dr_match_param *set_param,
+ struct mlx5dr_match_parameters *mask,
+ bool clr)
+{
+ u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {};
+ u8 *data = (u8 *)mask->match_buf;
+ size_t param_location;
+ void *buff;
+
+ if (match_criteria & DR_MATCHER_CRITERIA_OUTER) {
+ if (mask->match_sz < sizeof(struct mlx5dr_match_spec)) {
+ memcpy(tail_param, data, mask->match_sz);
+ buff = tail_param;
+ } else {
+ buff = mask->match_buf;
+ }
+ dr_ste_copy_mask_spec(buff, &set_param->outer, clr);
+ }
+ param_location = sizeof(struct mlx5dr_match_spec);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc(buff, &set_param->misc, clr);
+ }
+ param_location += sizeof(struct mlx5dr_match_misc);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_INNER) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_spec)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_spec(buff, &set_param->inner, clr);
+ }
+ param_location += sizeof(struct mlx5dr_match_spec);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC2) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc2)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc2(buff, &set_param->misc2, clr);
+ }
+
+ param_location += sizeof(struct mlx5dr_match_misc2);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC3) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc3)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc3(buff, &set_param->misc3, clr);
+ }
+
+ param_location += sizeof(struct mlx5dr_match_misc3);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC4) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc4)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc4(buff, &set_param->misc4, clr);
+ }
+
+ param_location += sizeof(struct mlx5dr_match_misc4);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC5) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc5)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc5(buff, &set_param->misc5, clr);
+ }
+}
+
+void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_l2_src_dst_init(sb, mask);
+}
+
+void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_l3_ipv6_dst_init(sb, mask);
+}
+
+void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_l3_ipv6_src_init(sb, mask);
+}
+
+void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_l3_ipv4_5_tuple_init(sb, mask);
+}
+
+void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_l2_src_init(sb, mask);
+}
+
+void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_l2_dst_init(sb, mask);
+}
+
+void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask, bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_l2_tnl_init(sb, mask);
+}
+
+void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_l3_ipv4_misc_init(sb, mask);
+}
+
+void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_ipv6_l3_l4_init(sb, mask);
+}
+
+static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ return 0;
+}
+
+void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx)
+{
+ sb->rx = rx;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_DONT_CARE;
+ sb->byte_mask = 0;
+ sb->ste_build_tag_func = &dr_ste_build_empty_always_hit_tag;
+}
+
+void mlx5dr_ste_build_mpls(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_mpls_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_tnl_gre_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->caps = caps;
+ return ste_ctx->build_tnl_mpls_over_gre_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->caps = caps;
+ return ste_ctx->build_tnl_mpls_over_udp_init(sb, mask);
+}
+
+void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->caps = caps;
+ ste_ctx->build_icmp_init(sb, mask);
+}
+
+void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_general_purpose_init(sb, mask);
+}
+
+void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_eth_l4_misc_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_tnl_vxlan_gpe_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_tnl_geneve_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->caps = caps;
+ sb->inner = inner;
+ ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ if (!ste_ctx->build_tnl_geneve_tlv_opt_exist_init)
+ return;
+
+ sb->rx = rx;
+ sb->caps = caps;
+ sb->inner = inner;
+ ste_ctx->build_tnl_geneve_tlv_opt_exist_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_tnl_gtpu_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->caps = caps;
+ sb->inner = inner;
+ ste_ctx->build_tnl_gtpu_flex_parser_0_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->caps = caps;
+ sb->inner = inner;
+ ste_ctx->build_tnl_gtpu_flex_parser_1_init(sb, mask);
+}
+
+void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_register_0_init(sb, mask);
+}
+
+void mlx5dr_ste_build_register_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_register_1_init(sb, mask);
+}
+
+void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn,
+ bool inner, bool rx)
+{
+ /* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */
+ sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id;
+
+ sb->rx = rx;
+ sb->dmn = dmn;
+ sb->inner = inner;
+ ste_ctx->build_src_gvmi_qpn_init(sb, mask);
+}
+
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_flex_parser_0_init(sb, mask);
+}
+
+void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_flex_parser_1_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ sb->rx = rx;
+ sb->inner = inner;
+ ste_ctx->build_tnl_header_0_1_init(sb, mask);
+}
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version)
+{
+ if (version == MLX5_STEERING_FORMAT_CONNECTX_5)
+ return mlx5dr_ste_get_ctx_v0();
+ else if (version == MLX5_STEERING_FORMAT_CONNECTX_6DX)
+ return mlx5dr_ste_get_ctx_v1();
+ else if (version == MLX5_STEERING_FORMAT_CONNECTX_7)
+ return mlx5dr_ste_get_ctx_v2();
+
+ return NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
new file mode 100644
index 000000000..17513baff
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */
+
+#ifndef _DR_STE_
+#define _DR_STE_
+
+#include "dr_types.h"
+
+#define STE_IPV4 0x1
+#define STE_IPV6 0x2
+#define STE_TCP 0x1
+#define STE_UDP 0x2
+#define STE_SPI 0x3
+#define IP_VERSION_IPV4 0x4
+#define IP_VERSION_IPV6 0x6
+#define STE_SVLAN 0x1
+#define STE_CVLAN 0x2
+#define HDR_LEN_L2_MACS 0xC
+#define HDR_LEN_L2_VLAN 0x4
+#define HDR_LEN_L2_ETHER 0x2
+#define HDR_LEN_L2 (HDR_LEN_L2_MACS + HDR_LEN_L2_ETHER)
+#define HDR_LEN_L2_W_VLAN (HDR_LEN_L2 + HDR_LEN_L2_VLAN)
+
+/* Set to STE a specific value using DR_STE_SET */
+#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \
+ if ((spec)->s_fname) { \
+ MLX5_SET(ste_##lookup_type, tag, t_fname, value); \
+ (spec)->s_fname = 0; \
+ } \
+} while (0)
+
+/* Set to STE spec->s_fname to tag->t_fname set spec->s_fname as used */
+#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \
+ DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname)
+
+/* Set to STE -1 to tag->t_fname and set spec->s_fname as used */
+#define DR_STE_SET_ONES(lookup_type, tag, t_fname, spec, s_fname) \
+ DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, -1)
+
+#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \
+ MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \
+} while (0)
+
+#define DR_STE_SET_MPLS(lookup_type, mask, in_out, tag) do { \
+ struct mlx5dr_match_misc2 *_mask = mask; \
+ u8 *_tag = tag; \
+ DR_STE_SET_TAG(lookup_type, _tag, mpls0_label, _mask, \
+ in_out##_first_mpls_label);\
+ DR_STE_SET_TAG(lookup_type, _tag, mpls0_s_bos, _mask, \
+ in_out##_first_mpls_s_bos); \
+ DR_STE_SET_TAG(lookup_type, _tag, mpls0_exp, _mask, \
+ in_out##_first_mpls_exp); \
+ DR_STE_SET_TAG(lookup_type, _tag, mpls0_ttl, _mask, \
+ in_out##_first_mpls_ttl); \
+} while (0)
+
+#define DR_STE_SET_FLEX_PARSER_FIELD(tag, fname, caps, spec) do { \
+ u8 parser_id = (caps)->flex_parser_id_##fname; \
+ u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); \
+ *(__be32 *)parser_ptr = cpu_to_be32((spec)->fname);\
+ (spec)->fname = 0;\
+} while (0)
+
+#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_gre_label || \
+ (_misc)->outer_first_mpls_over_gre_exp || \
+ (_misc)->outer_first_mpls_over_gre_s_bos || \
+ (_misc)->outer_first_mpls_over_gre_ttl)
+
+#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_udp_label || \
+ (_misc)->outer_first_mpls_over_udp_exp || \
+ (_misc)->outer_first_mpls_over_udp_s_bos || \
+ (_misc)->outer_first_mpls_over_udp_ttl)
+
+enum dr_ste_action_modify_type_l3 {
+ DR_STE_ACTION_MDFY_TYPE_L3_NONE = 0x0,
+ DR_STE_ACTION_MDFY_TYPE_L3_IPV4 = 0x1,
+ DR_STE_ACTION_MDFY_TYPE_L3_IPV6 = 0x2,
+};
+
+enum dr_ste_action_modify_type_l4 {
+ DR_STE_ACTION_MDFY_TYPE_L4_NONE = 0x0,
+ DR_STE_ACTION_MDFY_TYPE_L4_TCP = 0x1,
+ DR_STE_ACTION_MDFY_TYPE_L4_UDP = 0x2,
+};
+
+enum {
+ HDR_MPLS_OFFSET_LABEL = 12,
+ HDR_MPLS_OFFSET_EXP = 9,
+ HDR_MPLS_OFFSET_S_BOS = 8,
+ HDR_MPLS_OFFSET_TTL = 0,
+};
+
+u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask);
+
+static inline u8 *
+dr_ste_calc_flex_parser_offset(u8 *tag, u8 parser_id)
+{
+ /* Calculate tag byte offset based on flex parser id */
+ return tag + 4 * (3 - (parser_id % 4));
+}
+
+#define DR_STE_CTX_BUILDER(fname) \
+ ((*build_##fname##_init)(struct mlx5dr_ste_build *sb, \
+ struct mlx5dr_match_param *mask))
+
+struct mlx5dr_ste_ctx {
+ /* Builders */
+ void DR_STE_CTX_BUILDER(eth_l2_src_dst);
+ void DR_STE_CTX_BUILDER(eth_l3_ipv6_src);
+ void DR_STE_CTX_BUILDER(eth_l3_ipv6_dst);
+ void DR_STE_CTX_BUILDER(eth_l3_ipv4_5_tuple);
+ void DR_STE_CTX_BUILDER(eth_l2_src);
+ void DR_STE_CTX_BUILDER(eth_l2_dst);
+ void DR_STE_CTX_BUILDER(eth_l2_tnl);
+ void DR_STE_CTX_BUILDER(eth_l3_ipv4_misc);
+ void DR_STE_CTX_BUILDER(eth_ipv6_l3_l4);
+ void DR_STE_CTX_BUILDER(mpls);
+ void DR_STE_CTX_BUILDER(tnl_gre);
+ void DR_STE_CTX_BUILDER(tnl_mpls);
+ void DR_STE_CTX_BUILDER(tnl_mpls_over_gre);
+ void DR_STE_CTX_BUILDER(tnl_mpls_over_udp);
+ void DR_STE_CTX_BUILDER(icmp);
+ void DR_STE_CTX_BUILDER(general_purpose);
+ void DR_STE_CTX_BUILDER(eth_l4_misc);
+ void DR_STE_CTX_BUILDER(tnl_vxlan_gpe);
+ void DR_STE_CTX_BUILDER(tnl_geneve);
+ void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt);
+ void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt_exist);
+ void DR_STE_CTX_BUILDER(register_0);
+ void DR_STE_CTX_BUILDER(register_1);
+ void DR_STE_CTX_BUILDER(src_gvmi_qpn);
+ void DR_STE_CTX_BUILDER(flex_parser_0);
+ void DR_STE_CTX_BUILDER(flex_parser_1);
+ void DR_STE_CTX_BUILDER(tnl_gtpu);
+ void DR_STE_CTX_BUILDER(tnl_header_0_1);
+ void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0);
+ void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1);
+
+ /* Getters and Setters */
+ void (*ste_init)(u8 *hw_ste_p, u16 lu_type,
+ bool is_rx, u16 gvmi);
+ void (*set_next_lu_type)(u8 *hw_ste_p, u16 lu_type);
+ u16 (*get_next_lu_type)(u8 *hw_ste_p);
+ void (*set_miss_addr)(u8 *hw_ste_p, u64 miss_addr);
+ u64 (*get_miss_addr)(u8 *hw_ste_p);
+ void (*set_hit_addr)(u8 *hw_ste_p, u64 icm_addr, u32 ht_size);
+ void (*set_byte_mask)(u8 *hw_ste_p, u16 byte_mask);
+ u16 (*get_byte_mask)(u8 *hw_ste_p);
+
+ /* Actions */
+ u32 actions_caps;
+ void (*set_actions_rx)(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *hw_ste_arr,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes);
+ void (*set_actions_tx)(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *hw_ste_arr,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes);
+ u32 modify_field_arr_sz;
+ const struct mlx5dr_ste_action_modify_field *modify_field_arr;
+ void (*set_action_set)(u8 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data);
+ void (*set_action_add)(u8 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data);
+ void (*set_action_copy)(u8 *hw_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter);
+ int (*set_action_decap_l3_list)(void *data,
+ u32 data_sz,
+ u8 *hw_action,
+ u32 hw_action_sz,
+ u16 *used_hw_action_num);
+
+ /* Send */
+ void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size);
+};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void);
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void);
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void);
+
+#endif /* _DR_STE_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
new file mode 100644
index 000000000..2010d4ac6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -0,0 +1,1960 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */
+
+#include <linux/types.h>
+#include <linux/crc32.h>
+#include "dr_ste.h"
+
+#define SVLAN_ETHERTYPE 0x88a8
+#define DR_STE_ENABLE_FLOW_TAG BIT(31)
+
+enum dr_ste_v0_entry_type {
+ DR_STE_TYPE_TX = 1,
+ DR_STE_TYPE_RX = 2,
+ DR_STE_TYPE_MODIFY_PKT = 6,
+};
+
+enum dr_ste_v0_action_tunl {
+ DR_STE_TUNL_ACTION_NONE = 0,
+ DR_STE_TUNL_ACTION_ENABLE = 1,
+ DR_STE_TUNL_ACTION_DECAP = 2,
+ DR_STE_TUNL_ACTION_L3_DECAP = 3,
+ DR_STE_TUNL_ACTION_POP_VLAN = 4,
+};
+
+enum dr_ste_v0_action_type {
+ DR_STE_ACTION_TYPE_PUSH_VLAN = 1,
+ DR_STE_ACTION_TYPE_ENCAP_L3 = 3,
+ DR_STE_ACTION_TYPE_ENCAP = 4,
+};
+
+enum dr_ste_v0_action_mdfy_op {
+ DR_STE_ACTION_MDFY_OP_COPY = 0x1,
+ DR_STE_ACTION_MDFY_OP_SET = 0x2,
+ DR_STE_ACTION_MDFY_OP_ADD = 0x3,
+};
+
+#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \
+ ((inner) ? DR_STE_V0_LU_TYPE_##lookup_type##_I : \
+ (rx) ? DR_STE_V0_LU_TYPE_##lookup_type##_D : \
+ DR_STE_V0_LU_TYPE_##lookup_type##_O)
+
+enum {
+ DR_STE_V0_LU_TYPE_NOP = 0x00,
+ DR_STE_V0_LU_TYPE_SRC_GVMI_AND_QP = 0x05,
+ DR_STE_V0_LU_TYPE_ETHL2_TUNNELING_I = 0x0a,
+ DR_STE_V0_LU_TYPE_ETHL2_DST_O = 0x06,
+ DR_STE_V0_LU_TYPE_ETHL2_DST_I = 0x07,
+ DR_STE_V0_LU_TYPE_ETHL2_DST_D = 0x1b,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_O = 0x08,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_I = 0x09,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_D = 0x1c,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_O = 0x36,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_I = 0x37,
+ DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_D = 0x38,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_O = 0x0d,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_I = 0x0e,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_D = 0x1e,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_O = 0x0f,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_I = 0x10,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_D = 0x1f,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x11,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x12,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_D = 0x20,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_O = 0x29,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_I = 0x2a,
+ DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_D = 0x2b,
+ DR_STE_V0_LU_TYPE_ETHL4_O = 0x13,
+ DR_STE_V0_LU_TYPE_ETHL4_I = 0x14,
+ DR_STE_V0_LU_TYPE_ETHL4_D = 0x21,
+ DR_STE_V0_LU_TYPE_ETHL4_MISC_O = 0x2c,
+ DR_STE_V0_LU_TYPE_ETHL4_MISC_I = 0x2d,
+ DR_STE_V0_LU_TYPE_ETHL4_MISC_D = 0x2e,
+ DR_STE_V0_LU_TYPE_MPLS_FIRST_O = 0x15,
+ DR_STE_V0_LU_TYPE_MPLS_FIRST_I = 0x24,
+ DR_STE_V0_LU_TYPE_MPLS_FIRST_D = 0x25,
+ DR_STE_V0_LU_TYPE_GRE = 0x16,
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0 = 0x22,
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 = 0x23,
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x19,
+ DR_STE_V0_LU_TYPE_GENERAL_PURPOSE = 0x18,
+ DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0 = 0x2f,
+ DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1 = 0x30,
+ DR_STE_V0_LU_TYPE_TUNNEL_HEADER = 0x34,
+ DR_STE_V0_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE,
+};
+
+enum {
+ DR_STE_V0_ACTION_MDFY_FLD_L2_0 = 0,
+ DR_STE_V0_ACTION_MDFY_FLD_L2_1 = 1,
+ DR_STE_V0_ACTION_MDFY_FLD_L2_2 = 2,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_0 = 3,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_1 = 4,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_2 = 5,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_3 = 6,
+ DR_STE_V0_ACTION_MDFY_FLD_L3_4 = 7,
+ DR_STE_V0_ACTION_MDFY_FLD_L4_0 = 8,
+ DR_STE_V0_ACTION_MDFY_FLD_L4_1 = 9,
+ DR_STE_V0_ACTION_MDFY_FLD_MPLS = 10,
+ DR_STE_V0_ACTION_MDFY_FLD_L2_TNL_0 = 11,
+ DR_STE_V0_ACTION_MDFY_FLD_REG_0 = 12,
+ DR_STE_V0_ACTION_MDFY_FLD_REG_1 = 13,
+ DR_STE_V0_ACTION_MDFY_FLD_REG_2 = 14,
+ DR_STE_V0_ACTION_MDFY_FLD_REG_3 = 15,
+ DR_STE_V0_ACTION_MDFY_FLD_L4_2 = 16,
+ DR_STE_V0_ACTION_MDFY_FLD_FLEX_0 = 17,
+ DR_STE_V0_ACTION_MDFY_FLD_FLEX_1 = 18,
+ DR_STE_V0_ACTION_MDFY_FLD_FLEX_2 = 19,
+ DR_STE_V0_ACTION_MDFY_FLD_FLEX_3 = 20,
+ DR_STE_V0_ACTION_MDFY_FLD_L2_TNL_1 = 21,
+ DR_STE_V0_ACTION_MDFY_FLD_METADATA = 22,
+ DR_STE_V0_ACTION_MDFY_FLD_RESERVED = 23,
+};
+
+static const struct mlx5dr_ste_action_modify_field dr_ste_v0_action_modify_field_arr[] = {
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_1, .start = 16, .end = 47,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_1, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_2, .start = 32, .end = 47,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_0, .start = 16, .end = 47,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_0, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 0, .end = 5,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 48, .end = 56,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_3, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_4, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_4, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_2, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 32, .end = 63,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_METADATA, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_METADATA, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_0, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_1, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_2, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_2, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_1, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
+ .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_2, .start = 0, .end = 15,
+ },
+};
+
+static void dr_ste_v0_set_entry_type(u8 *hw_ste_p, u8 entry_type)
+{
+ MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type);
+}
+
+static u8 dr_ste_v0_get_entry_type(u8 *hw_ste_p)
+{
+ return MLX5_GET(ste_general, hw_ste_p, entry_type);
+}
+
+static void dr_ste_v0_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
+{
+ u64 index = miss_addr >> 6;
+
+ /* Miss address for TX and RX STEs located in the same offsets */
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index);
+}
+
+static u64 dr_ste_v0_get_miss_addr(u8 *hw_ste_p)
+{
+ u64 index =
+ ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6) |
+ ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32)) << 26);
+
+ return index << 6;
+}
+
+static void dr_ste_v0_set_byte_mask(u8 *hw_ste_p, u16 byte_mask)
+{
+ MLX5_SET(ste_general, hw_ste_p, byte_mask, byte_mask);
+}
+
+static u16 dr_ste_v0_get_byte_mask(u8 *hw_ste_p)
+{
+ return MLX5_GET(ste_general, hw_ste_p, byte_mask);
+}
+
+static void dr_ste_v0_set_lu_type(u8 *hw_ste_p, u16 lu_type)
+{
+ MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type);
+}
+
+static void dr_ste_v0_set_next_lu_type(u8 *hw_ste_p, u16 lu_type)
+{
+ MLX5_SET(ste_general, hw_ste_p, next_lu_type, lu_type);
+}
+
+static u16 dr_ste_v0_get_next_lu_type(u8 *hw_ste_p)
+{
+ return MLX5_GET(ste_general, hw_ste_p, next_lu_type);
+}
+
+static void dr_ste_v0_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi)
+{
+ MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi);
+}
+
+static void dr_ste_v0_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
+{
+ u64 index = (icm_addr >> 5) | ht_size;
+
+ MLX5_SET(ste_general, hw_ste_p, next_table_base_39_32_size, index >> 27);
+ MLX5_SET(ste_general, hw_ste_p, next_table_base_31_5_size, index);
+}
+
+static void dr_ste_v0_init_full(u8 *hw_ste_p, u16 lu_type,
+ enum dr_ste_v0_entry_type entry_type, u16 gvmi)
+{
+ dr_ste_v0_set_entry_type(hw_ste_p, entry_type);
+ dr_ste_v0_set_lu_type(hw_ste_p, lu_type);
+ dr_ste_v0_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE);
+
+ /* Set GVMI once, this is the same for RX/TX
+ * bits 63_48 of next table base / miss address encode the next GVMI
+ */
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi);
+}
+
+static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type,
+ bool is_rx, u16 gvmi)
+{
+ enum dr_ste_v0_entry_type entry_type;
+
+ entry_type = is_rx ? DR_STE_TYPE_RX : DR_STE_TYPE_TX;
+ dr_ste_v0_init_full(hw_ste_p, lu_type, entry_type, gvmi);
+}
+
+static void dr_ste_v0_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer,
+ DR_STE_ENABLE_FLOW_TAG | flow_tag);
+}
+
+static void dr_ste_v0_set_counter_id(u8 *hw_ste_p, u32 ctr_id)
+{
+ /* This can be used for both rx_steering_mult and for sx_transmit */
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16);
+}
+
+static void dr_ste_v0_set_go_back_bit(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1);
+}
+
+static void dr_ste_v0_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr,
+ bool go_back)
+{
+ MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
+ DR_STE_ACTION_TYPE_PUSH_VLAN);
+ MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr);
+ /* Due to HW limitation we need to set this bit, otherwise reformat +
+ * push vlan will not work.
+ */
+ if (go_back)
+ dr_ste_v0_set_go_back_bit(hw_ste_p);
+}
+
+static void dr_ste_v0_set_tx_encap(void *hw_ste_p, u32 reformat_id,
+ int size, bool encap_l3)
+{
+ MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
+ encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP);
+ /* The hardware expects here size in words (2 byte) */
+ MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2);
+ MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id);
+}
+
+static void dr_ste_v0_set_rx_decap(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+ DR_STE_TUNL_ACTION_DECAP);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1);
+}
+
+static void dr_ste_v0_set_rx_pop_vlan(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+ DR_STE_TUNL_ACTION_POP_VLAN);
+}
+
+static void dr_ste_v0_set_rx_decap_l3(u8 *hw_ste_p, bool vlan)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+ DR_STE_TUNL_ACTION_L3_DECAP);
+ MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1);
+}
+
+static void dr_ste_v0_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
+ u32 re_write_index)
+{
+ MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions,
+ num_of_actions);
+ MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer,
+ re_write_index);
+}
+
+static void dr_ste_v0_arr_init_next(u8 **last_ste,
+ u32 *added_stes,
+ enum dr_ste_v0_entry_type entry_type,
+ u16 gvmi)
+{
+ (*added_stes)++;
+ *last_ste += DR_STE_SIZE;
+ dr_ste_v0_init_full(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE,
+ entry_type, gvmi);
+}
+
+static void
+dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] ||
+ action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3];
+
+ /* We want to make sure the modify header comes before L2
+ * encapsulation. The reason for that is that we support
+ * modify headers for outer headers only
+ */
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
+ dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
+ dr_ste_v0_set_rewrite_actions(last_ste,
+ attr->modify_actions,
+ attr->modify_index);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR])
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_TX,
+ attr->gvmi);
+
+ dr_ste_v0_set_tx_push_vlan(last_ste,
+ attr->vlans.headers[i],
+ encap);
+ }
+ }
+
+ if (encap) {
+ /* Modify header and encapsulation require a different STEs.
+ * Since modify header STE format doesn't support encapsulation
+ * tunneling_action.
+ */
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] ||
+ action_type_set[DR_ACTION_TYP_PUSH_VLAN])
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_TX,
+ attr->gvmi);
+
+ dr_ste_v0_set_tx_encap(last_ste,
+ attr->reformat.id,
+ attr->reformat.size,
+ action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]);
+ /* Whenever prio_tag_required enabled, we can be sure that the
+ * previous table (ACL) already push vlan to our packet,
+ * And due to HW limitation we need to set this bit, otherwise
+ * push vlan + reformat will not work.
+ */
+ if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required))
+ dr_ste_v0_set_go_back_bit(last_ste);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ dr_ste_v0_set_counter_id(last_ste, attr->ctr_id);
+
+ dr_ste_v0_set_hit_gvmi(last_ste, attr->hit_gvmi);
+ dr_ste_v0_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+static void
+dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ dr_ste_v0_set_counter_id(last_ste, attr->ctr_id);
+
+ if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
+ dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
+ dr_ste_v0_set_rx_decap_l3(last_ste, attr->decap_with_vlan);
+ dr_ste_v0_set_rewrite_actions(last_ste,
+ attr->decap_actions,
+ attr->decap_index);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2])
+ dr_ste_v0_set_rx_decap(last_ste);
+
+ if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (i ||
+ action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] ||
+ action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2])
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_RX,
+ attr->gvmi);
+
+ dr_ste_v0_set_rx_pop_vlan(last_ste);
+ }
+ }
+
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
+ if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_MODIFY_PKT,
+ attr->gvmi);
+ else
+ dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
+
+ dr_ste_v0_set_rewrite_actions(last_ste,
+ attr->modify_actions,
+ attr->modify_index);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_TAG]) {
+ if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
+ dr_ste_v0_arr_init_next(&last_ste,
+ added_stes,
+ DR_STE_TYPE_RX,
+ attr->gvmi);
+
+ dr_ste_v0_rx_set_flow_tag(last_ste, attr->flow_tag);
+ }
+
+ dr_ste_v0_set_hit_gvmi(last_ste, attr->hit_gvmi);
+ dr_ste_v0_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+static void dr_ste_v0_set_action_set(u8 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ length = (length == 32) ? 0 : length;
+ MLX5_SET(dr_action_hw_set, hw_action, opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, hw_field);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, shifter);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_length, length);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, data);
+}
+
+static void dr_ste_v0_set_action_add(u8 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ length = (length == 32) ? 0 : length;
+ MLX5_SET(dr_action_hw_set, hw_action, opcode, DR_STE_ACTION_MDFY_OP_ADD);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, hw_field);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, shifter);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_length, length);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, data);
+}
+
+static void dr_ste_v0_set_action_copy(u8 *hw_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter)
+{
+ MLX5_SET(dr_action_hw_copy, hw_action, opcode, DR_STE_ACTION_MDFY_OP_COPY);
+ MLX5_SET(dr_action_hw_copy, hw_action, destination_field_code, dst_hw_field);
+ MLX5_SET(dr_action_hw_copy, hw_action, destination_left_shifter, dst_shifter);
+ MLX5_SET(dr_action_hw_copy, hw_action, destination_length, dst_len);
+ MLX5_SET(dr_action_hw_copy, hw_action, source_field_code, src_hw_field);
+ MLX5_SET(dr_action_hw_copy, hw_action, source_left_shifter, src_shifter);
+}
+
+#define DR_STE_DECAP_L3_MIN_ACTION_NUM 5
+
+static int
+dr_ste_v0_set_action_decap_l3_list(void *data, u32 data_sz,
+ u8 *hw_action, u32 hw_action_sz,
+ u16 *used_hw_action_num)
+{
+ struct mlx5_ifc_l2_hdr_bits *l2_hdr = data;
+ u32 hw_action_num;
+ int required_actions;
+ u32 hdr_fld_4b;
+ u16 hdr_fld_2b;
+ u16 vlan_type;
+ bool vlan;
+
+ vlan = (data_sz != HDR_LEN_L2);
+ hw_action_num = hw_action_sz / MLX5_ST_SZ_BYTES(dr_action_hw_set);
+ required_actions = DR_STE_DECAP_L3_MIN_ACTION_NUM + !!vlan;
+
+ if (hw_action_num < required_actions)
+ return -ENOMEM;
+
+ /* dmac_47_16 */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 16);
+ hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ inline_data, hdr_fld_4b);
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ /* smac_47_16 */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_1);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, 16);
+ hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 |
+ MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_4b);
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ /* dmac_15_0 */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 16);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 0);
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ inline_data, hdr_fld_2b);
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ /* ethertype + (optional) vlan */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_2);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 32);
+ if (!vlan) {
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_2b);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_length, 16);
+ } else {
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
+ vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN;
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan);
+ hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b;
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_4b);
+ MLX5_SET(dr_action_hw_set, hw_action, destination_length, 18);
+ }
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ /* smac_15_0 */
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 16);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_1);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 0);
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0);
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_2b);
+ hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set);
+
+ if (vlan) {
+ MLX5_SET(dr_action_hw_set, hw_action,
+ opcode, DR_STE_ACTION_MDFY_OP_SET);
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ inline_data, hdr_fld_2b);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_length, 16);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_2);
+ MLX5_SET(dr_action_hw_set, hw_action,
+ destination_left_shifter, 0);
+ }
+
+ *used_hw_action_num = required_actions;
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ if (mask->smac_47_16 || mask->smac_15_0) {
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32,
+ mask->smac_47_16 >> 16);
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0,
+ mask->smac_47_16 << 16 | mask->smac_15_0);
+ mask->smac_47_16 = 0;
+ mask->smac_15_0 = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_ONES(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ } else if (mask->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
+ mask->svlan_tag = 0;
+ }
+}
+
+static int
+dr_ste_v0_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0);
+
+ if (spec->smac_47_16 || spec->smac_15_0) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32,
+ spec->smac_47_16 >> 16);
+ MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0,
+ spec->smac_47_16 << 16 | spec->smac_15_0);
+ spec->smac_47_16 = 0;
+ spec->smac_15_0 = 0;
+ }
+
+ if (spec->ip_version) {
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio);
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_src_dst_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv6_dst_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv6_src_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag;
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_ONES(eth_l2_src, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_mask->inner_second_cvlan_tag ||
+ misc_mask->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->inner_second_cvlan_tag = 0;
+ misc_mask->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_vlan_id, misc_mask, inner_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_cfi, misc_mask, inner_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_priority, misc_mask, inner_second_prio);
+ } else {
+ if (misc_mask->outer_second_cvlan_tag ||
+ misc_mask->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->outer_second_cvlan_tag = 0;
+ misc_mask->outer_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_vlan_id, misc_mask, outer_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_cfi, misc_mask, outer_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask,
+ second_priority, misc_mask, outer_second_prio);
+ }
+}
+
+static int
+dr_ste_v0_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value,
+ bool inner, u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_spec = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype);
+
+ if (spec->ip_version) {
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_spec->inner_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->inner_second_cvlan_tag = 0;
+ } else if (misc_spec->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio);
+ } else {
+ if (misc_spec->outer_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->outer_second_cvlan_tag = 0;
+ } else if (misc_spec->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->outer_second_svlan_tag = 0;
+ }
+ DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio);
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0);
+
+ dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int
+dr_ste_v0_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0);
+
+ return dr_ste_v0_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
+}
+
+static void
+dr_ste_v0_build_eth_l2_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask);
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_src_tag;
+}
+
+static void
+dr_ste_v0_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(value, sb->inner, bit_mask);
+}
+
+static int
+dr_ste_v0_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0);
+
+ return dr_ste_v0_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
+}
+
+static void
+dr_ste_v0_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l2_dst_bit_mask(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_dst_tag;
+}
+
+static void
+dr_ste_v0_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_TAG(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_ONES(eth_l2_tnl, bit_mask, l3_type, mask, ip_version);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl, bit_mask,
+ l2_tunneling_network_id, (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+}
+
+static int
+dr_ste_v0_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id,
+ (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (spec->ip_version) {
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_ETHL2_TUNNELING_I;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_tnl_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit);
+ DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, ihl, spec, ipv4_ihl);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv4_misc_tag;
+}
+
+static int
+dr_ste_v0_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn);
+ DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit);
+
+ if (sb->inner)
+ DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, inner_ipv6_flow_label);
+ else
+ DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, outer_ipv6_flow_label);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_ipv6_l3_l4_tag;
+}
+
+static int
+dr_ste_v0_build_mpls_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ if (sb->inner)
+ DR_STE_SET_MPLS(mpls, misc2, inner, tag);
+ else
+ DR_STE_SET_MPLS(mpls, misc2, outer, tag);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_mpls_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_mpls_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_gre_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol);
+
+ DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present);
+ DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h);
+ DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l);
+
+ DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present);
+
+ DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_gre_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_GRE;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gre_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_mpls_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc_2 = &value->misc2;
+ u32 mpls_hdr;
+
+ if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2)) {
+ mpls_hdr = misc_2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL;
+ misc_2->outer_first_mpls_over_gre_label = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP;
+ misc_2->outer_first_mpls_over_gre_exp = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc_2->outer_first_mpls_over_gre_s_bos = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL;
+ misc_2->outer_first_mpls_over_gre_ttl = 0;
+ } else {
+ mpls_hdr = misc_2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL;
+ misc_2->outer_first_mpls_over_udp_label = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP;
+ misc_2->outer_first_mpls_over_udp_exp = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc_2->outer_first_mpls_over_udp_s_bos = 0;
+ mpls_hdr |= misc_2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL;
+ misc_2->outer_first_mpls_over_udp_ttl = 0;
+ }
+
+ MLX5_SET(ste_flex_parser_0, tag, flex_parser_3, mpls_hdr);
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_mpls_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *parser_ptr;
+ u8 parser_id;
+ u32 mpls_hdr;
+
+ mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL;
+ misc2->outer_first_mpls_over_udp_label = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP;
+ misc2->outer_first_mpls_over_udp_exp = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc2->outer_first_mpls_over_udp_s_bos = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL;
+ misc2->outer_first_mpls_over_udp_ttl = 0;
+
+ parser_id = sb->caps->flex_parser_id_mpls_over_udp;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+ *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask);
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_udp_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *parser_ptr;
+ u8 parser_id;
+ u32 mpls_hdr;
+
+ mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL;
+ misc2->outer_first_mpls_over_gre_label = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP;
+ misc2->outer_first_mpls_over_gre_exp = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc2->outer_first_mpls_over_gre_s_bos = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL;
+ misc2->outer_first_mpls_over_gre_ttl = 0;
+
+ parser_id = sb->caps->flex_parser_id_mpls_over_gre;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+ *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_gre_tag;
+}
+
+#define ICMP_TYPE_OFFSET_FIRST_DW 24
+#define ICMP_CODE_OFFSET_FIRST_DW 16
+
+static int
+dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc_3 = &value->misc3;
+ u32 *icmp_header_data;
+ int dw0_location;
+ int dw1_location;
+ u8 *parser_ptr;
+ u8 *icmp_type;
+ u8 *icmp_code;
+ bool is_ipv4;
+ u32 icmp_hdr;
+
+ is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc_3);
+ if (is_ipv4) {
+ icmp_header_data = &misc_3->icmpv4_header_data;
+ icmp_type = &misc_3->icmpv4_type;
+ icmp_code = &misc_3->icmpv4_code;
+ dw0_location = sb->caps->flex_parser_id_icmp_dw0;
+ dw1_location = sb->caps->flex_parser_id_icmp_dw1;
+ } else {
+ icmp_header_data = &misc_3->icmpv6_header_data;
+ icmp_type = &misc_3->icmpv6_type;
+ icmp_code = &misc_3->icmpv6_code;
+ dw0_location = sb->caps->flex_parser_id_icmpv6_dw0;
+ dw1_location = sb->caps->flex_parser_id_icmpv6_dw1;
+ }
+
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw0_location);
+ icmp_hdr = (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) |
+ (*icmp_code << ICMP_CODE_OFFSET_FIRST_DW);
+ *(__be32 *)parser_ptr = cpu_to_be32(icmp_hdr);
+ *icmp_code = 0;
+ *icmp_type = 0;
+
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw1_location);
+ *(__be32 *)parser_ptr = cpu_to_be32(*icmp_header_data);
+ *icmp_header_data = 0;
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_icmp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ u8 parser_id;
+ bool is_ipv4;
+
+ dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ is_ipv4 = DR_MASK_IS_ICMPV4_SET(&mask->misc3);
+ parser_id = is_ipv4 ? sb->caps->flex_parser_id_icmp_dw0 :
+ sb->caps->flex_parser_id_icmpv6_dw0;
+ sb->lu_type = parser_id > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_icmp_tag;
+}
+
+static int
+dr_ste_v0_build_general_purpose_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc_2 = &value->misc2;
+
+ DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field,
+ misc_2, metadata_reg_a);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_general_purpose_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_general_purpose_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_GENERAL_PURPOSE;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_general_purpose_tag;
+}
+
+static int
+dr_ste_v0_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ if (sb->inner) {
+ DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num);
+ } else {
+ DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num);
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_eth_l4_misc_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, sb->rx, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_eth_l4_misc_tag;
+}
+
+static int
+dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_flags, misc3,
+ outer_vxlan_gpe_flags);
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_next_protocol, misc3,
+ outer_vxlan_gpe_next_protocol);
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_vni, misc3,
+ outer_vxlan_gpe_vni);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask);
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag;
+}
+
+static int
+dr_ste_v0_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_protocol_type, misc, geneve_protocol_type);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_oam, misc, geneve_oam);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_opt_len, misc, geneve_opt_len);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_vni, misc, geneve_vni);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask);
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tag;
+}
+
+static int
+dr_ste_v0_build_register_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0);
+ DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1);
+ DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2);
+ DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_register_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_register_0_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_register_0_tag;
+}
+
+static int
+dr_ste_v0_build_register_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4);
+ DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5);
+ DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6);
+ DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7);
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_register_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_register_1_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_register_1_tag;
+}
+
+static void
+dr_ste_v0_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_ONES(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port);
+ DR_STE_SET_ONES(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn);
+ misc_mask->source_eswitch_owner_vhca_id = 0;
+}
+
+static int
+dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+ struct mlx5dr_cmd_vport_cap *vport_cap;
+ struct mlx5dr_domain *dmn = sb->dmn;
+ struct mlx5dr_domain *vport_dmn;
+ u8 *bit_mask = sb->bit_mask;
+ bool source_gvmi_set;
+
+ DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
+
+ if (sb->vhca_id_valid) {
+ /* Find port GVMI based on the eswitch_owner_vhca_id */
+ if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+ vport_dmn = dmn;
+ else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
+ dmn->peer_dmn->info.caps.gvmi))
+ vport_dmn = dmn->peer_dmn;
+ else
+ return -EINVAL;
+
+ misc->source_eswitch_owner_vhca_id = 0;
+ } else {
+ vport_dmn = dmn;
+ }
+
+ source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi);
+ if (source_gvmi_set) {
+ vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn,
+ misc->source_port);
+ if (!vport_cap) {
+ mlx5dr_err(dmn, "Vport 0x%x is disabled or invalid\n",
+ misc->source_port);
+ return -EINVAL;
+ }
+
+ if (vport_cap->vport_gvmi)
+ MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi);
+
+ misc->source_port = 0;
+ }
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_SRC_GVMI_AND_QP;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_src_gvmi_qpn_tag;
+}
+
+static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id,
+ u32 *misc4_field_value,
+ bool *parser_is_used,
+ u8 *tag)
+{
+ u32 id = *misc4_field_id;
+ u8 *parser_ptr;
+
+ if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id])
+ return;
+
+ parser_is_used[id] = true;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, id);
+
+ *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value);
+ *misc4_field_id = 0;
+ *misc4_field_value = 0;
+}
+
+static int dr_ste_v0_build_flex_parser_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4;
+ bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {};
+
+ dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_0,
+ &misc_4_mask->prog_sample_field_value_0,
+ parser_is_used, tag);
+
+ dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_1,
+ &misc_4_mask->prog_sample_field_value_1,
+ parser_is_used, tag);
+
+ dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_2,
+ &misc_4_mask->prog_sample_field_value_2,
+ parser_is_used, tag);
+
+ dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_3,
+ &misc_4_mask->prog_sample_field_value_3,
+ parser_is_used, tag);
+
+ return 0;
+}
+
+static void dr_ste_v0_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+ dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag;
+}
+
+static void dr_ste_v0_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1;
+ dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag;
+}
+
+static int
+dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+ u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0;
+ u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+
+ MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3,
+ misc3->geneve_tlv_option_0_data);
+ misc3->geneve_tlv_option_0_data = 0;
+
+ return 0;
+}
+
+static void
+dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ?
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag;
+}
+
+static int dr_ste_v0_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag,
+ gtpu_msg_flags, misc3,
+ gtpu_msg_flags);
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag,
+ gtpu_msg_type, misc3,
+ gtpu_msg_type);
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag,
+ gtpu_teid, misc3,
+ gtpu_teid);
+
+ return 0;
+}
+
+static void dr_ste_v0_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_gtpu_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+ return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag;
+}
+
+static int dr_ste_v0_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ uint8_t *tag)
+{
+ struct mlx5dr_match_misc5 *misc5 = &value->misc5;
+
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0);
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1);
+
+ return 0;
+}
+
+static void dr_ste_v0_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V0_LU_TYPE_TUNNEL_HEADER;
+ dr_ste_v0_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v0_build_tnl_header_0_1_tag;
+}
+
+static struct mlx5dr_ste_ctx ste_ctx_v0 = {
+ /* Builders */
+ .build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init,
+ .build_eth_l3_ipv6_src_init = &dr_ste_v0_build_eth_l3_ipv6_src_init,
+ .build_eth_l3_ipv6_dst_init = &dr_ste_v0_build_eth_l3_ipv6_dst_init,
+ .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v0_build_eth_l3_ipv4_5_tuple_init,
+ .build_eth_l2_src_init = &dr_ste_v0_build_eth_l2_src_init,
+ .build_eth_l2_dst_init = &dr_ste_v0_build_eth_l2_dst_init,
+ .build_eth_l2_tnl_init = &dr_ste_v0_build_eth_l2_tnl_init,
+ .build_eth_l3_ipv4_misc_init = &dr_ste_v0_build_eth_l3_ipv4_misc_init,
+ .build_eth_ipv6_l3_l4_init = &dr_ste_v0_build_eth_ipv6_l3_l4_init,
+ .build_mpls_init = &dr_ste_v0_build_mpls_init,
+ .build_tnl_gre_init = &dr_ste_v0_build_tnl_gre_init,
+ .build_tnl_mpls_init = &dr_ste_v0_build_tnl_mpls_init,
+ .build_tnl_mpls_over_udp_init = &dr_ste_v0_build_tnl_mpls_over_udp_init,
+ .build_tnl_mpls_over_gre_init = &dr_ste_v0_build_tnl_mpls_over_gre_init,
+ .build_icmp_init = &dr_ste_v0_build_icmp_init,
+ .build_general_purpose_init = &dr_ste_v0_build_general_purpose_init,
+ .build_eth_l4_misc_init = &dr_ste_v0_build_eth_l4_misc_init,
+ .build_tnl_vxlan_gpe_init = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init,
+ .build_tnl_geneve_init = &dr_ste_v0_build_flex_parser_tnl_geneve_init,
+ .build_tnl_geneve_tlv_opt_init = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init,
+ .build_register_0_init = &dr_ste_v0_build_register_0_init,
+ .build_register_1_init = &dr_ste_v0_build_register_1_init,
+ .build_src_gvmi_qpn_init = &dr_ste_v0_build_src_gvmi_qpn_init,
+ .build_flex_parser_0_init = &dr_ste_v0_build_flex_parser_0_init,
+ .build_flex_parser_1_init = &dr_ste_v0_build_flex_parser_1_init,
+ .build_tnl_gtpu_init = &dr_ste_v0_build_flex_parser_tnl_gtpu_init,
+ .build_tnl_header_0_1_init = &dr_ste_v0_build_tnl_header_0_1_init,
+ .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init,
+ .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init,
+
+ /* Getters and Setters */
+ .ste_init = &dr_ste_v0_init,
+ .set_next_lu_type = &dr_ste_v0_set_next_lu_type,
+ .get_next_lu_type = &dr_ste_v0_get_next_lu_type,
+ .set_miss_addr = &dr_ste_v0_set_miss_addr,
+ .get_miss_addr = &dr_ste_v0_get_miss_addr,
+ .set_hit_addr = &dr_ste_v0_set_hit_addr,
+ .set_byte_mask = &dr_ste_v0_set_byte_mask,
+ .get_byte_mask = &dr_ste_v0_get_byte_mask,
+
+ /* Actions */
+ .actions_caps = DR_STE_CTX_ACTION_CAP_NONE,
+ .set_actions_rx = &dr_ste_v0_set_actions_rx,
+ .set_actions_tx = &dr_ste_v0_set_actions_tx,
+ .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v0_action_modify_field_arr),
+ .modify_field_arr = dr_ste_v0_action_modify_field_arr,
+ .set_action_set = &dr_ste_v0_set_action_set,
+ .set_action_add = &dr_ste_v0_set_action_add,
+ .set_action_copy = &dr_ste_v0_set_action_copy,
+ .set_action_decap_l3_list = &dr_ste_v0_set_action_decap_l3_list,
+};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void)
+{
+ return &ste_ctx_v0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
new file mode 100644
index 000000000..ee677a5c7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -0,0 +1,2172 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */
+
+#include <linux/types.h>
+#include "mlx5_ifc_dr_ste_v1.h"
+#include "dr_ste_v1.h"
+
+#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \
+ ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \
+ DR_STE_V1_LU_TYPE_##lookup_type##_O)
+
+enum dr_ste_v1_entry_format {
+ DR_STE_V1_TYPE_BWC_BYTE = 0x0,
+ DR_STE_V1_TYPE_BWC_DW = 0x1,
+ DR_STE_V1_TYPE_MATCH = 0x2,
+};
+
+/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */
+enum {
+ DR_STE_V1_LU_TYPE_NOP = 0x0000,
+ DR_STE_V1_LU_TYPE_ETHL2_TNL = 0x0002,
+ DR_STE_V1_LU_TYPE_IBL3_EXT = 0x0102,
+ DR_STE_V1_LU_TYPE_ETHL2_O = 0x0003,
+ DR_STE_V1_LU_TYPE_IBL4 = 0x0103,
+ DR_STE_V1_LU_TYPE_ETHL2_I = 0x0004,
+ DR_STE_V1_LU_TYPE_SRC_QP_GVMI = 0x0104,
+ DR_STE_V1_LU_TYPE_ETHL2_SRC_O = 0x0005,
+ DR_STE_V1_LU_TYPE_ETHL2_HEADERS_O = 0x0105,
+ DR_STE_V1_LU_TYPE_ETHL2_SRC_I = 0x0006,
+ DR_STE_V1_LU_TYPE_ETHL2_HEADERS_I = 0x0106,
+ DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x0007,
+ DR_STE_V1_LU_TYPE_IPV6_DES_O = 0x0107,
+ DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x0008,
+ DR_STE_V1_LU_TYPE_IPV6_DES_I = 0x0108,
+ DR_STE_V1_LU_TYPE_ETHL4_O = 0x0009,
+ DR_STE_V1_LU_TYPE_IPV6_SRC_O = 0x0109,
+ DR_STE_V1_LU_TYPE_ETHL4_I = 0x000a,
+ DR_STE_V1_LU_TYPE_IPV6_SRC_I = 0x010a,
+ DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_O = 0x000b,
+ DR_STE_V1_LU_TYPE_MPLS_O = 0x010b,
+ DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_I = 0x000c,
+ DR_STE_V1_LU_TYPE_MPLS_I = 0x010c,
+ DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_O = 0x000d,
+ DR_STE_V1_LU_TYPE_GRE = 0x010d,
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x000e,
+ DR_STE_V1_LU_TYPE_GENERAL_PURPOSE = 0x010e,
+ DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f,
+ DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f,
+ DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110,
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011,
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111,
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112,
+ DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113,
+ DR_STE_V1_LU_TYPE_ETHL4_MISC_I = 0x0114,
+ DR_STE_V1_LU_TYPE_INVALID = 0x00ff,
+ DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE,
+};
+
+enum dr_ste_v1_header_anchors {
+ DR_STE_HEADER_ANCHOR_START_OUTER = 0x00,
+ DR_STE_HEADER_ANCHOR_1ST_VLAN = 0x02,
+ DR_STE_HEADER_ANCHOR_IPV6_IPV4 = 0x07,
+ DR_STE_HEADER_ANCHOR_INNER_MAC = 0x13,
+ DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19,
+};
+
+enum dr_ste_v1_action_size {
+ DR_STE_ACTION_SINGLE_SZ = 4,
+ DR_STE_ACTION_DOUBLE_SZ = 8,
+ DR_STE_ACTION_TRIPLE_SZ = 12,
+};
+
+enum dr_ste_v1_action_insert_ptr_attr {
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE = 0, /* Regular push header (e.g. push vlan) */
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP = 1, /* Encapsulation / Tunneling */
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_ESP = 2, /* IPsec */
+};
+
+enum dr_ste_v1_action_id {
+ DR_STE_V1_ACTION_ID_NOP = 0x00,
+ DR_STE_V1_ACTION_ID_COPY = 0x05,
+ DR_STE_V1_ACTION_ID_SET = 0x06,
+ DR_STE_V1_ACTION_ID_ADD = 0x07,
+ DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE = 0x08,
+ DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER = 0x09,
+ DR_STE_V1_ACTION_ID_INSERT_INLINE = 0x0a,
+ DR_STE_V1_ACTION_ID_INSERT_POINTER = 0x0b,
+ DR_STE_V1_ACTION_ID_FLOW_TAG = 0x0c,
+ DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d,
+ DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e,
+ DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f,
+ DR_STE_V1_ACTION_ID_ASO = 0x12,
+ DR_STE_V1_ACTION_ID_TRAILER = 0x13,
+ DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14,
+ DR_STE_V1_ACTION_ID_MAX = 0x21,
+ /* use for special cases */
+ DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22,
+};
+
+enum {
+ DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0 = 0x00,
+ DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1 = 0x01,
+ DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2 = 0x02,
+ DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08,
+ DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09,
+ DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e,
+ DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0 = 0x18,
+ DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1 = 0x19,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e,
+ DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f,
+ DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e,
+ DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f,
+ DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f,
+ DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70,
+ DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b,
+ DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90,
+ DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91,
+};
+
+enum dr_ste_v1_aso_ctx_type {
+ DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2,
+};
+
+static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = {
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = {
+ .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15,
+ },
+};
+
+static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type);
+}
+
+void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
+{
+ u64 index = miss_addr >> 6;
+
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32, index >> 26);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6, index);
+}
+
+u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p)
+{
+ u64 index =
+ ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) |
+ ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32)) << 26);
+
+ return index << 6;
+}
+
+void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, byte_mask, byte_mask);
+}
+
+u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p)
+{
+ return MLX5_GET(ste_match_bwc_v1, hw_ste_p, byte_mask);
+}
+
+static void dr_ste_v1_set_lu_type(u8 *hw_ste_p, u16 lu_type)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, lu_type >> 8);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, match_definer_ctx_idx, lu_type & 0xFF);
+}
+
+void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_entry_format, lu_type >> 8);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx, lu_type & 0xFF);
+}
+
+u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p)
+{
+ u8 mode = MLX5_GET(ste_match_bwc_v1, hw_ste_p, next_entry_format);
+ u8 index = MLX5_GET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx);
+
+ return (mode << 8 | index);
+}
+
+static void dr_ste_v1_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi);
+}
+
+void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
+{
+ u64 index = (icm_addr >> 5) | ht_size;
+
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_39_32_size, index >> 27);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_31_5_size, index);
+}
+
+void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi)
+{
+ dr_ste_v1_set_lu_type(hw_ste_p, lu_type);
+ dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE);
+
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, gvmi, gvmi);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi);
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi);
+}
+
+void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size)
+{
+ u8 *tag = hw_ste_p + DR_STE_SIZE_CTRL;
+ u8 *mask = tag + DR_STE_SIZE_TAG;
+ u8 tmp_tag[DR_STE_SIZE_TAG] = {};
+
+ if (ste_size == DR_STE_SIZE_CTRL)
+ return;
+
+ WARN_ON(ste_size != DR_STE_SIZE);
+
+ /* Backup tag */
+ memcpy(tmp_tag, tag, DR_STE_SIZE_TAG);
+
+ /* Swap mask and tag both are the same size */
+ memcpy(tag, mask, DR_STE_SIZE_MASK);
+ memcpy(mask, tmp_tag, DR_STE_SIZE_TAG);
+}
+
+static void dr_ste_v1_set_rx_flow_tag(u8 *s_action, u32 flow_tag)
+{
+ MLX5_SET(ste_single_action_flow_tag_v1, s_action, action_id,
+ DR_STE_V1_ACTION_ID_FLOW_TAG);
+ MLX5_SET(ste_single_action_flow_tag_v1, s_action, flow_tag, flow_tag);
+}
+
+static void dr_ste_v1_set_counter_id(u8 *hw_ste_p, u32 ctr_id)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, counter_id, ctr_id);
+}
+
+static void dr_ste_v1_set_reparse(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1);
+}
+
+static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action,
+ u32 reformat_id, int size)
+{
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id,
+ DR_STE_V1_ACTION_ID_INSERT_POINTER);
+ /* The hardware expects here size in words (2 byte) */
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes,
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP);
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
+ u32 reformat_id,
+ u8 anchor, u8 offset,
+ int size)
+{
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action,
+ action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_anchor, anchor);
+
+ /* The hardware expects here size and offset in words (2 byte) */
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_offset, offset / 2);
+
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes,
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action,
+ u8 anchor, u8 offset,
+ int size)
+{
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+ action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_anchor, anchor);
+
+ /* The hardware expects here size and offset in words (2 byte) */
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action, remove_size, size / 2);
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_offset, offset / 2);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action,
+ u32 vlan_hdr)
+{
+ MLX5_SET(ste_double_action_insert_with_inline_v1, d_action,
+ action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE);
+ /* The hardware expects offset to vlan header in words (2 byte) */
+ MLX5_SET(ste_double_action_insert_with_inline_v1, d_action,
+ start_offset, HDR_LEN_L2_MACS >> 1);
+ MLX5_SET(ste_double_action_insert_with_inline_v1, d_action,
+ inline_data, vlan_hdr);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num)
+{
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+ action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+ start_anchor, DR_STE_HEADER_ANCHOR_1ST_VLAN);
+ /* The hardware expects here size in words (2 byte) */
+ MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+ remove_size, (HDR_LEN_L2_VLAN >> 1) * vlans_num);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p,
+ u8 *frst_s_action,
+ u8 *scnd_d_action,
+ u32 reformat_id,
+ int size)
+{
+ /* Remove L2 headers */
+ MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id,
+ DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER);
+ MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, end_anchor,
+ DR_STE_HEADER_ANCHOR_IPV6_IPV4);
+
+ /* Encapsulate with given reformat ID */
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, action_id,
+ DR_STE_V1_ACTION_ID_INSERT_POINTER);
+ /* The hardware expects here size in words (2 byte) */
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, size, size / 2);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, pointer, reformat_id);
+ MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, attributes,
+ DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action)
+{
+ MLX5_SET(ste_single_action_remove_header_v1, s_action, action_id,
+ DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER);
+ MLX5_SET(ste_single_action_remove_header_v1, s_action, decap, 1);
+ MLX5_SET(ste_single_action_remove_header_v1, s_action, vni_to_cqe, 1);
+ MLX5_SET(ste_single_action_remove_header_v1, s_action, end_anchor,
+ DR_STE_HEADER_ANCHOR_INNER_MAC);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
+ u8 *s_action,
+ u16 num_of_actions,
+ u32 re_write_index)
+{
+ MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id,
+ DR_STE_V1_ACTION_ID_MODIFY_LIST);
+ MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions,
+ num_of_actions);
+ MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr,
+ re_write_index);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_aso_flow_meter(u8 *d_action,
+ u32 object_id,
+ u32 offset,
+ u8 dest_reg_id,
+ u8 init_color)
+{
+ MLX5_SET(ste_double_action_aso_v1, d_action, action_id,
+ DR_STE_V1_ACTION_ID_ASO);
+ MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_number,
+ object_id + (offset / MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ));
+ /* Convert reg_c index to HW 64bit index */
+ MLX5_SET(ste_double_action_aso_v1, d_action, dest_reg_id,
+ (dest_reg_id - 1) / 2);
+ MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_type,
+ DR_STE_V1_ASO_CTX_TYPE_POLICERS);
+ MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.line_id,
+ offset % MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ);
+ MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.initial_color,
+ init_color);
+}
+
+static void dr_ste_v1_arr_init_next_match(u8 **last_ste,
+ u32 *added_stes,
+ u16 gvmi)
+{
+ u8 *action;
+
+ (*added_stes)++;
+ *last_ste += DR_STE_SIZE;
+ dr_ste_v1_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, 0, gvmi);
+ dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH);
+
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, *last_ste, action);
+ memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action));
+}
+
+void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action);
+ u8 action_sz = DR_STE_ACTION_DOUBLE_SZ;
+ bool allow_modify_hdr = true;
+ bool allow_encap = true;
+
+ if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
+ attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+ last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+
+ /* Check if vlan_pop and modify_hdr on same STE is supported */
+ if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY))
+ allow_modify_hdr = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
+
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
+ attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+ last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_rewrite_actions(last_ste, action,
+ attr->modify_actions,
+ attr->modify_index);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ allow_encap = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ || !allow_encap) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_encap = true;
+ }
+ dr_ste_v1_set_push_vlan(last_ste, action,
+ attr->vlans.headers[i]);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+ }
+
+ if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) {
+ if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_encap = true;
+ }
+ dr_ste_v1_set_encap(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) {
+ u8 *d_action;
+
+ if (action_sz < DR_STE_ACTION_TRIPLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ d_action = action + DR_STE_ACTION_SINGLE_SZ;
+
+ dr_ste_v1_set_encap_l3(last_ste,
+ action, d_action,
+ attr->reformat.id,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_TRIPLE_SZ;
+ action += DR_STE_ACTION_TRIPLE_SZ;
+ } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) {
+ if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_insert_hdr(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_remove_hdr(last_ste, action,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_aso_flow_meter(action,
+ attr->aso_flow_meter.obj_id,
+ attr->aso_flow_meter.offset,
+ attr->aso_flow_meter.dest_reg_id,
+ attr->aso_flow_meter.init_color);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+
+ dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
+ dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u32 actions_caps,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes)
+{
+ u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action);
+ u8 action_sz = DR_STE_ACTION_DOUBLE_SZ;
+ bool allow_modify_hdr = true;
+ bool allow_ctr = true;
+
+ if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
+ dr_ste_v1_set_rewrite_actions(last_ste, action,
+ attr->decap_actions,
+ attr->decap_index);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ allow_modify_hdr = false;
+ allow_ctr = false;
+ } else if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) {
+ dr_ste_v1_set_rx_decap(last_ste, action);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ allow_modify_hdr = false;
+ allow_ctr = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_TAG]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = true;
+ allow_ctr = true;
+ }
+ dr_ste_v1_set_rx_flow_tag(action, attr->flow_tag);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ ||
+ !allow_modify_hdr) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+
+ dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ allow_ctr = false;
+
+ /* Check if vlan_pop and modify_hdr on same STE is supported */
+ if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY))
+ allow_modify_hdr = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ /* Modify header and decapsulation must use different STEs */
+ if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = true;
+ allow_ctr = true;
+ }
+ dr_ste_v1_set_rewrite_actions(last_ste, action,
+ attr->modify_actions,
+ attr->modify_index);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ ||
+ !allow_modify_hdr) {
+ dr_ste_v1_arr_init_next_match(&last_ste,
+ added_stes,
+ attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+ last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_push_vlan(last_ste, action,
+ attr->vlans.headers[i]);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+ }
+
+ if (action_type_set[DR_ACTION_TYP_CTR]) {
+ /* Counter action set after decap and before insert_hdr
+ * to exclude decaped / encaped header respectively.
+ */
+ if (!allow_ctr) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = true;
+ }
+ dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
+ allow_ctr = false;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_encap(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ allow_modify_hdr = false;
+ } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) {
+ u8 *d_action;
+
+ if (action_sz < DR_STE_ACTION_TRIPLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+
+ d_action = action + DR_STE_ACTION_SINGLE_SZ;
+
+ dr_ste_v1_set_encap_l3(last_ste,
+ action, d_action,
+ attr->reformat.id,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = false;
+ } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) {
+ /* Modify header, decap, and encap must use different STEs */
+ if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_insert_hdr(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ allow_modify_hdr = false;
+ } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) {
+ if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ allow_modify_hdr = true;
+ allow_ctr = true;
+ }
+ dr_ste_v1_set_remove_hdr(last_ste, action,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
+ action_sz -= DR_STE_ACTION_SINGLE_SZ;
+ action += DR_STE_ACTION_SINGLE_SZ;
+ }
+
+ if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) {
+ if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+ dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+ action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+ action_sz = DR_STE_ACTION_TRIPLE_SZ;
+ }
+ dr_ste_v1_set_aso_flow_meter(action,
+ attr->aso_flow_meter.obj_id,
+ attr->aso_flow_meter.offset,
+ attr->aso_flow_meter.dest_reg_id,
+ attr->aso_flow_meter.init_color);
+ action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+ action += DR_STE_ACTION_DOUBLE_SZ;
+ }
+
+ dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
+ dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+void dr_ste_v1_set_action_set(u8 *d_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
+ MLX5_SET(ste_double_action_set_v1, d_action, action_id, DR_STE_V1_ACTION_ID_SET);
+ MLX5_SET(ste_double_action_set_v1, d_action, destination_dw_offset, hw_field);
+ MLX5_SET(ste_double_action_set_v1, d_action, destination_left_shifter, shifter);
+ MLX5_SET(ste_double_action_set_v1, d_action, destination_length, length);
+ MLX5_SET(ste_double_action_set_v1, d_action, inline_data, data);
+}
+
+void dr_ste_v1_set_action_add(u8 *d_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data)
+{
+ shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
+ MLX5_SET(ste_double_action_add_v1, d_action, action_id, DR_STE_V1_ACTION_ID_ADD);
+ MLX5_SET(ste_double_action_add_v1, d_action, destination_dw_offset, hw_field);
+ MLX5_SET(ste_double_action_add_v1, d_action, destination_left_shifter, shifter);
+ MLX5_SET(ste_double_action_add_v1, d_action, destination_length, length);
+ MLX5_SET(ste_double_action_add_v1, d_action, add_value, data);
+}
+
+void dr_ste_v1_set_action_copy(u8 *d_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter)
+{
+ dst_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
+ src_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET;
+ MLX5_SET(ste_double_action_copy_v1, d_action, action_id, DR_STE_V1_ACTION_ID_COPY);
+ MLX5_SET(ste_double_action_copy_v1, d_action, destination_dw_offset, dst_hw_field);
+ MLX5_SET(ste_double_action_copy_v1, d_action, destination_left_shifter, dst_shifter);
+ MLX5_SET(ste_double_action_copy_v1, d_action, destination_length, dst_len);
+ MLX5_SET(ste_double_action_copy_v1, d_action, source_dw_offset, src_hw_field);
+ MLX5_SET(ste_double_action_copy_v1, d_action, source_right_shifter, src_shifter);
+}
+
+#define DR_STE_DECAP_L3_ACTION_NUM 8
+#define DR_STE_L2_HDR_MAX_SZ 20
+
+int dr_ste_v1_set_action_decap_l3_list(void *data,
+ u32 data_sz,
+ u8 *hw_action,
+ u32 hw_action_sz,
+ u16 *used_hw_action_num)
+{
+ u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {};
+ void *data_ptr = padded_data;
+ u16 used_actions = 0;
+ u32 inline_data_sz;
+ u32 i;
+
+ if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM)
+ return -EINVAL;
+
+ inline_data_sz =
+ MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+
+ /* Add an alignment padding */
+ memcpy(padded_data + data_sz % inline_data_sz, data, data_sz);
+
+ /* Remove L2L3 outer headers */
+ MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id,
+ DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER);
+ MLX5_SET(ste_single_action_remove_header_v1, hw_action, decap, 1);
+ MLX5_SET(ste_single_action_remove_header_v1, hw_action, vni_to_cqe, 1);
+ MLX5_SET(ste_single_action_remove_header_v1, hw_action, end_anchor,
+ DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4);
+ hw_action += DR_STE_ACTION_DOUBLE_SZ;
+ used_actions++; /* Remove and NOP are a single double action */
+
+ /* Point to the last dword of the header */
+ data_ptr += (data_sz / inline_data_sz) * inline_data_sz;
+
+ /* Add the new header using inline action 4Byte at a time, the header
+ * is added in reversed order to the beginning of the packet to avoid
+ * incorrect parsing by the HW. Since header is 14B or 18B an extra
+ * two bytes are padded and later removed.
+ */
+ for (i = 0; i < data_sz / inline_data_sz + 1; i++) {
+ void *addr_inline;
+
+ MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id,
+ DR_STE_V1_ACTION_ID_INSERT_INLINE);
+ /* The hardware expects here offset to words (2 bytes) */
+ MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0);
+
+ /* Copy bytes one by one to avoid endianness problem */
+ addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1,
+ hw_action, inline_data);
+ memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz);
+ hw_action += DR_STE_ACTION_DOUBLE_SZ;
+ used_actions++;
+ }
+
+ /* Remove first 2 extra bytes */
+ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id,
+ DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0);
+ /* The hardware expects here size in words (2 bytes) */
+ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1);
+ used_actions++;
+
+ *used_hw_action_num = used_actions;
+
+ return 0;
+}
+
+static void dr_ste_v1_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_47_16, mask, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_15_0, mask, smac_15_0);
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_ONES(eth_l2_src_dst_v1, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ } else if (mask->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1);
+ mask->svlan_tag = 0;
+ }
+}
+
+static int dr_ste_v1_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_15_0, spec, dmac_15_0);
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_47_16, spec, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_15_0, spec, smac_15_0);
+
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else if (spec->ip_version) {
+ return -EINVAL;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_priority, spec, first_prio);
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC_DST, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_dst_tag;
+}
+
+static int dr_ste_v1_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0);
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_DES, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_dst_tag;
+}
+
+static int dr_ste_v1_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0);
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_SRC, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_src_tag;
+}
+
+static int dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_address, spec, dst_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_address, spec, src_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, ecn, spec, ip_ecn);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple_v1, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_5_TUPLE, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag;
+}
+
+static void dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_ONES(eth_l2_src_v1, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_mask->inner_second_cvlan_tag ||
+ misc_mask->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->inner_second_cvlan_tag = 0;
+ misc_mask->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_vlan_id, misc_mask, inner_second_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_cfi, misc_mask, inner_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_priority, misc_mask, inner_second_prio);
+ } else {
+ if (misc_mask->outer_second_cvlan_tag ||
+ misc_mask->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->outer_second_cvlan_tag = 0;
+ misc_mask->outer_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_vlan_id, misc_mask, outer_second_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_cfi, misc_mask, outer_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask,
+ second_priority, misc_mask, outer_second_prio);
+ }
+}
+
+static int dr_ste_v1_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value,
+ bool inner, u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_spec = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, l3_ethertype, spec, ethertype);
+
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else if (spec->ip_version) {
+ return -EINVAL;
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_spec->inner_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->inner_second_cvlan_tag = 0;
+ } else if (misc_spec->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, inner_second_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, inner_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, inner_second_prio);
+ } else {
+ if (misc_spec->outer_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->outer_second_cvlan_tag = 0;
+ } else if (misc_spec->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->outer_second_svlan_tag = 0;
+ }
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, outer_second_vid);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, outer_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, outer_second_prio);
+ }
+
+ return 0;
+}
+
+static void dr_ste_v1_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_47_16, mask, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_15_0, mask, smac_15_0);
+
+ dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int dr_ste_v1_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_47_16, spec, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_15_0, spec, smac_15_0);
+
+ return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
+}
+
+void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_tag;
+}
+
+static void dr_ste_v1_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int dr_ste_v1_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_15_0, spec, dmac_15_0);
+
+ return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
+}
+
+void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l2_dst_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_dst_tag;
+}
+
+static void dr_ste_v1_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_15_0, mask, dmac_15_0);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_ONES(eth_l2_tnl_v1, bit_mask, l3_type, mask, ip_version);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl_v1, bit_mask,
+ l2_tunneling_network_id, (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl_v1, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+}
+
+static int dr_ste_v1_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_15_0, spec, dmac_15_0);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_tnl_v1, tag, l3_ethertype, spec, ethertype);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, l2_tunneling_network_id,
+ (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else if (spec->ip_version) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_ETHL2_TNL;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_tnl_tag;
+}
+
+static int dr_ste_v1_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, time_to_live, spec, ttl_hoplimit);
+ DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, ihl, spec, ipv4_ihl);
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_MISC, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_misc_tag;
+}
+
+static int dr_ste_v1_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l4_v1, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l4_v1, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l4_v1, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l4_v1, tag, ecn, spec, ip_ecn);
+ DR_STE_SET_TAG(eth_l4_v1, tag, ipv6_hop_limit, spec, ttl_hoplimit);
+
+ if (sb->inner)
+ DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, inner_ipv6_flow_label);
+ else
+ DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, outer_ipv6_flow_label);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l4_v1, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL4, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_ipv6_l3_l4_tag;
+}
+
+static int dr_ste_v1_build_mpls_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ if (sb->inner)
+ DR_STE_SET_MPLS(mpls_v1, misc2, inner, tag);
+ else
+ DR_STE_SET_MPLS(mpls_v1, misc2, outer, tag);
+
+ return 0;
+}
+
+void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_mpls_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_CALC_DFNR_TYPE(MPLS, sb->inner);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_mpls_tag;
+}
+
+static int dr_ste_v1_build_tnl_gre_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(gre_v1, tag, gre_protocol, misc, gre_protocol);
+ DR_STE_SET_TAG(gre_v1, tag, gre_k_present, misc, gre_k_present);
+ DR_STE_SET_TAG(gre_v1, tag, gre_key_h, misc, gre_key_h);
+ DR_STE_SET_TAG(gre_v1, tag, gre_key_l, misc, gre_key_l);
+
+ DR_STE_SET_TAG(gre_v1, tag, gre_c_present, misc, gre_c_present);
+ DR_STE_SET_TAG(gre_v1, tag, gre_s_present, misc, gre_s_present);
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_gre_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_GRE;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gre_tag;
+}
+
+static int dr_ste_v1_build_tnl_mpls_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc2)) {
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_label,
+ misc2, outer_first_mpls_over_gre_label);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp,
+ misc2, outer_first_mpls_over_gre_exp);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos,
+ misc2, outer_first_mpls_over_gre_s_bos);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl,
+ misc2, outer_first_mpls_over_gre_ttl);
+ } else {
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_label,
+ misc2, outer_first_mpls_over_udp_label);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp,
+ misc2, outer_first_mpls_over_udp_exp);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos,
+ misc2, outer_first_mpls_over_udp_s_bos);
+
+ DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl,
+ misc2, outer_first_mpls_over_udp_ttl);
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_mpls_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_MPLS_I;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_tag;
+}
+
+static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *parser_ptr;
+ u8 parser_id;
+ u32 mpls_hdr;
+
+ mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL;
+ misc2->outer_first_mpls_over_udp_label = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP;
+ misc2->outer_first_mpls_over_udp_exp = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc2->outer_first_mpls_over_udp_s_bos = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL;
+ misc2->outer_first_mpls_over_udp_ttl = 0;
+
+ parser_id = sb->caps->flex_parser_id_mpls_over_udp;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+ *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_udp_tag;
+}
+
+static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *parser_ptr;
+ u8 parser_id;
+ u32 mpls_hdr;
+
+ mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL;
+ misc2->outer_first_mpls_over_gre_label = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP;
+ misc2->outer_first_mpls_over_gre_exp = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS;
+ misc2->outer_first_mpls_over_gre_s_bos = 0;
+ mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL;
+ misc2->outer_first_mpls_over_gre_ttl = 0;
+
+ parser_id = sb->caps->flex_parser_id_mpls_over_gre;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+ *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ?
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_gre_tag;
+}
+
+static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+ bool is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc3);
+ u32 *icmp_header_data;
+ u8 *icmp_type;
+ u8 *icmp_code;
+
+ if (is_ipv4) {
+ icmp_header_data = &misc3->icmpv4_header_data;
+ icmp_type = &misc3->icmpv4_type;
+ icmp_code = &misc3->icmpv4_code;
+ } else {
+ icmp_header_data = &misc3->icmpv6_header_data;
+ icmp_type = &misc3->icmpv6_type;
+ icmp_code = &misc3->icmpv6_code;
+ }
+
+ MLX5_SET(ste_icmp_v1, tag, icmp_header_data, *icmp_header_data);
+ MLX5_SET(ste_icmp_v1, tag, icmp_type, *icmp_type);
+ MLX5_SET(ste_icmp_v1, tag, icmp_code, *icmp_code);
+
+ *icmp_header_data = 0;
+ *icmp_type = 0;
+ *icmp_code = 0;
+
+ return 0;
+}
+
+void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_icmp_tag;
+}
+
+static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field,
+ misc2, metadata_reg_a);
+
+ return 0;
+}
+
+void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_general_purpose_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_GENERAL_PURPOSE;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_general_purpose_tag;
+}
+
+static int dr_ste_v1_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ if (sb->inner) {
+ DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, inner_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, inner_tcp_ack_num);
+ } else {
+ DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, outer_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, outer_tcp_ack_num);
+ }
+
+ return 0;
+}
+
+void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_eth_l4_misc_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_eth_l4_misc_tag;
+}
+
+static int
+dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_flags, misc3,
+ outer_vxlan_gpe_flags);
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_next_protocol, misc3,
+ outer_vxlan_gpe_next_protocol);
+ DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
+ outer_vxlan_gpe_vni, misc3,
+ outer_vxlan_gpe_vni);
+
+ return 0;
+}
+
+void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag;
+}
+
+static int
+dr_ste_v1_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_protocol_type, misc, geneve_protocol_type);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_oam, misc, geneve_oam);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_opt_len, misc, geneve_opt_len);
+ DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
+ geneve_vni, misc, geneve_vni);
+
+ return 0;
+}
+
+void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tag;
+}
+
+static int dr_ste_v1_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ uint8_t *tag)
+{
+ struct mlx5dr_match_misc5 *misc5 = &value->misc5;
+
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0);
+ DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1);
+
+ return 0;
+}
+
+void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ dr_ste_v1_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_header_0_1_tag;
+}
+
+static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0);
+ DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1);
+ DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2);
+ DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3);
+
+ return 0;
+}
+
+void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_register_0_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_register_0_tag;
+}
+
+static int dr_ste_v1_build_register_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+
+ DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4);
+ DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5);
+ DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6);
+ DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7);
+
+ return 0;
+}
+
+void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_register_1_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_register_1_tag;
+}
+
+static void dr_ste_v1_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port);
+ DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn);
+ misc_mask->source_eswitch_owner_vhca_id = 0;
+}
+
+static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc *misc = &value->misc;
+ struct mlx5dr_cmd_vport_cap *vport_cap;
+ struct mlx5dr_domain *dmn = sb->dmn;
+ struct mlx5dr_domain *vport_dmn;
+ u8 *bit_mask = sb->bit_mask;
+
+ DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn);
+
+ if (sb->vhca_id_valid) {
+ /* Find port GVMI based on the eswitch_owner_vhca_id */
+ if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+ vport_dmn = dmn;
+ else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
+ dmn->peer_dmn->info.caps.gvmi))
+ vport_dmn = dmn->peer_dmn;
+ else
+ return -EINVAL;
+
+ misc->source_eswitch_owner_vhca_id = 0;
+ } else {
+ vport_dmn = dmn;
+ }
+
+ if (!MLX5_GET(ste_src_gvmi_qp_v1, bit_mask, source_gvmi))
+ return 0;
+
+ vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, misc->source_port);
+ if (!vport_cap) {
+ mlx5dr_err(dmn, "Vport 0x%x is disabled or invalid\n",
+ misc->source_port);
+ return -EINVAL;
+ }
+
+ if (vport_cap->vport_gvmi)
+ MLX5_SET(ste_src_gvmi_qp_v1, tag, source_gvmi, vport_cap->vport_gvmi);
+
+ misc->source_port = 0;
+ return 0;
+}
+
+void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_SRC_QP_GVMI;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_src_gvmi_qpn_tag;
+}
+
+static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id,
+ u32 *misc4_field_value,
+ bool *parser_is_used,
+ u8 *tag)
+{
+ u32 id = *misc4_field_id;
+ u8 *parser_ptr;
+
+ if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id])
+ return;
+
+ parser_is_used[id] = true;
+ parser_ptr = dr_ste_calc_flex_parser_offset(tag, id);
+
+ *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value);
+ *misc4_field_id = 0;
+ *misc4_field_value = 0;
+}
+
+static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4;
+ bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {};
+
+ dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_0,
+ &misc_4_mask->prog_sample_field_value_0,
+ parser_is_used, tag);
+
+ dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_1,
+ &misc_4_mask->prog_sample_field_value_1,
+ parser_is_used, tag);
+
+ dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_2,
+ &misc_4_mask->prog_sample_field_value_2,
+ parser_is_used, tag);
+
+ dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_3,
+ &misc_4_mask->prog_sample_field_value_3,
+ parser_is_used, tag);
+
+ return 0;
+}
+
+void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+ dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag;
+}
+
+void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1;
+ dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag;
+}
+
+static int
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+ u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0;
+ u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+
+ MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3,
+ misc3->geneve_tlv_option_0_data);
+ misc3->geneve_tlv_option_0_data = 0;
+
+ return 0;
+}
+
+void
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask);
+
+ /* STEs with lookup type FLEX_PARSER_{0/1} includes
+ * flex parsers_{0-3}/{4-7} respectively.
+ */
+ sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ?
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_1 :
+ DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag;
+}
+
+static int
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ uint8_t *tag)
+{
+ u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ if (misc->geneve_tlv_option_0_exist) {
+ MLX5_SET(ste_flex_parser_ok, tag, flex_parsers_ok, 1 << parser_id);
+ misc->geneve_tlv_option_0_exist = 0;
+ }
+
+ return 0;
+}
+
+void
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_OK;
+ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(mask, sb, sb->bit_mask);
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag;
+}
+
+static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_flags, misc3, gtpu_msg_flags);
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_type, misc3, gtpu_msg_type);
+ DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_teid, misc3, gtpu_teid);
+
+ return 0;
+}
+
+void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_gtpu_tag;
+}
+
+static int
+dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+ return 0;
+}
+
+void
+dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag;
+}
+
+static int
+dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag)
+{
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+ if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+ DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+ return 0;
+}
+
+void
+dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask)
+{
+ dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask);
+
+ sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1;
+ sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag;
+}
+
+static struct mlx5dr_ste_ctx ste_ctx_v1 = {
+ /* Builders */
+ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init,
+ .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init,
+ .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init,
+ .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init,
+ .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init,
+ .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init,
+ .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init,
+ .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init,
+ .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init,
+ .build_mpls_init = &dr_ste_v1_build_mpls_init,
+ .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init,
+ .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init,
+ .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init,
+ .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init,
+ .build_icmp_init = &dr_ste_v1_build_icmp_init,
+ .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init,
+ .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init,
+ .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init,
+ .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init,
+ .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init,
+ .build_tnl_geneve_tlv_opt_exist_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init,
+ .build_register_0_init = &dr_ste_v1_build_register_0_init,
+ .build_register_1_init = &dr_ste_v1_build_register_1_init,
+ .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init,
+ .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init,
+ .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init,
+ .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init,
+ .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init,
+ .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init,
+ .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init,
+
+ /* Getters and Setters */
+ .ste_init = &dr_ste_v1_init,
+ .set_next_lu_type = &dr_ste_v1_set_next_lu_type,
+ .get_next_lu_type = &dr_ste_v1_get_next_lu_type,
+ .set_miss_addr = &dr_ste_v1_set_miss_addr,
+ .get_miss_addr = &dr_ste_v1_get_miss_addr,
+ .set_hit_addr = &dr_ste_v1_set_hit_addr,
+ .set_byte_mask = &dr_ste_v1_set_byte_mask,
+ .get_byte_mask = &dr_ste_v1_get_byte_mask,
+ /* Actions */
+ .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP |
+ DR_STE_CTX_ACTION_CAP_RX_PUSH |
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP |
+ DR_STE_CTX_ACTION_CAP_POP_MDFY,
+ .set_actions_rx = &dr_ste_v1_set_actions_rx,
+ .set_actions_tx = &dr_ste_v1_set_actions_tx,
+ .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr),
+ .modify_field_arr = dr_ste_v1_action_modify_field_arr,
+ .set_action_set = &dr_ste_v1_set_action_set,
+ .set_action_add = &dr_ste_v1_set_action_add,
+ .set_action_copy = &dr_ste_v1_set_action_copy,
+ .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list,
+ /* Send */
+ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
+};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void)
+{
+ return &ste_ctx_v1;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
new file mode 100644
index 000000000..8a1d49790
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef _DR_STE_V1_
+#define _DR_STE_V1_
+
+#include "dr_types.h"
+#include "dr_ste.h"
+
+void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr);
+u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p);
+void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask);
+u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p);
+void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type);
+u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p);
+void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size);
+void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi);
+void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size);
+void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set,
+ u32 actions_caps, u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr, u32 *added_stes);
+void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set,
+ u32 actions_caps, u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr, u32 *added_stes);
+void dr_ste_v1_set_action_set(u8 *d_action, u8 hw_field, u8 shifter,
+ u8 length, u32 data);
+void dr_ste_v1_set_action_add(u8 *d_action, u8 hw_field, u8 shifter,
+ u8 length, u32 data);
+void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter,
+ u8 dst_len, u8 src_hw_field, u8 src_shifter);
+int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action,
+ u32 hw_action_sz, u16 *used_hw_action_num);
+void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+void dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask);
+
+#endif /* _DR_STE_V1_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
new file mode 100644
index 000000000..c60fddd12
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "dr_ste_v1.h"
+
+enum {
+ DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00,
+ DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01,
+ DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02,
+ DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08,
+ DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09,
+ DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e,
+ DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18,
+ DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e,
+ DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f,
+ DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e,
+ DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f,
+ DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f,
+ DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70,
+ DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b,
+ DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94,
+ DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95,
+};
+
+static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = {
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15,
+ .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31,
+ .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = {
+ .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15,
+ },
+};
+
+static struct mlx5dr_ste_ctx ste_ctx_v2 = {
+ /* Builders */
+ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init,
+ .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init,
+ .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init,
+ .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init,
+ .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init,
+ .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init,
+ .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init,
+ .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init,
+ .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init,
+ .build_mpls_init = &dr_ste_v1_build_mpls_init,
+ .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init,
+ .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init,
+ .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init,
+ .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init,
+ .build_icmp_init = &dr_ste_v1_build_icmp_init,
+ .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init,
+ .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init,
+ .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init,
+ .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init,
+ .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init,
+ .build_tnl_geneve_tlv_opt_exist_init =
+ &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init,
+ .build_register_0_init = &dr_ste_v1_build_register_0_init,
+ .build_register_1_init = &dr_ste_v1_build_register_1_init,
+ .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init,
+ .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init,
+ .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init,
+ .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init,
+ .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init,
+ .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init,
+ .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init,
+
+ /* Getters and Setters */
+ .ste_init = &dr_ste_v1_init,
+ .set_next_lu_type = &dr_ste_v1_set_next_lu_type,
+ .get_next_lu_type = &dr_ste_v1_get_next_lu_type,
+ .set_miss_addr = &dr_ste_v1_set_miss_addr,
+ .get_miss_addr = &dr_ste_v1_get_miss_addr,
+ .set_hit_addr = &dr_ste_v1_set_hit_addr,
+ .set_byte_mask = &dr_ste_v1_set_byte_mask,
+ .get_byte_mask = &dr_ste_v1_get_byte_mask,
+
+ /* Actions */
+ .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP |
+ DR_STE_CTX_ACTION_CAP_RX_PUSH |
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP,
+ .set_actions_rx = &dr_ste_v1_set_actions_rx,
+ .set_actions_tx = &dr_ste_v1_set_actions_tx,
+ .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v2_action_modify_field_arr),
+ .modify_field_arr = dr_ste_v2_action_modify_field_arr,
+ .set_action_set = &dr_ste_v1_set_action_set,
+ .set_action_add = &dr_ste_v1_set_action_add,
+ .set_action_copy = &dr_ste_v1_set_action_copy,
+ .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list,
+
+ /* Send */
+ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
+};
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void)
+{
+ return &ste_ctx_v2;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
new file mode 100644
index 000000000..f68461b13
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_table_rx_tx *nic_tbl,
+ struct mlx5dr_action *action)
+{
+ struct mlx5dr_matcher_rx_tx *last_nic_matcher = NULL;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_ste_htbl *last_htbl;
+ struct mlx5dr_icm_chunk *chunk;
+ int ret;
+
+ if (!list_empty(&nic_tbl->nic_matcher_list))
+ last_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list,
+ struct mlx5dr_matcher_rx_tx,
+ list_node);
+
+ if (last_nic_matcher)
+ last_htbl = last_nic_matcher->e_anchor;
+ else
+ last_htbl = nic_tbl->s_anchor;
+
+ if (action) {
+ chunk = nic_tbl->nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX ?
+ action->dest_tbl->tbl->rx.s_anchor->chunk :
+ action->dest_tbl->tbl->tx.s_anchor->chunk;
+ nic_tbl->default_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk);
+ } else {
+ nic_tbl->default_icm_addr = nic_tbl->nic_dmn->default_icm_addr;
+ }
+
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_tbl->default_icm_addr;
+
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_tbl->nic_dmn,
+ last_htbl, &info, true);
+ if (ret)
+ mlx5dr_dbg(dmn, "Failed to set NIC RX/TX miss action, ret %d\n", ret);
+
+ return ret;
+}
+
+int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ struct mlx5dr_action *action)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (action && action->action_type != DR_ACTION_TYP_FT)
+ return -EOPNOTSUPP;
+
+ mlx5dr_domain_lock(tbl->dmn);
+
+ if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX ||
+ tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
+ ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->rx, action);
+ if (ret)
+ goto out;
+ }
+
+ if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX ||
+ tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
+ ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->tx, action);
+ if (ret)
+ goto out;
+ }
+
+ if (ret)
+ goto out;
+
+ /* Release old action */
+ if (tbl->miss_action)
+ refcount_dec(&tbl->miss_action->refcount);
+
+ /* Set new miss action */
+ tbl->miss_action = action;
+ if (tbl->miss_action)
+ refcount_inc(&action->refcount);
+
+out:
+ mlx5dr_domain_unlock(tbl->dmn);
+ return ret;
+}
+
+static void dr_table_uninit_nic(struct mlx5dr_table_rx_tx *nic_tbl)
+{
+ mlx5dr_htbl_put(nic_tbl->s_anchor);
+}
+
+static void dr_table_uninit_fdb(struct mlx5dr_table *tbl)
+{
+ dr_table_uninit_nic(&tbl->rx);
+ dr_table_uninit_nic(&tbl->tx);
+}
+
+static void dr_table_uninit(struct mlx5dr_table *tbl)
+{
+ mlx5dr_domain_lock(tbl->dmn);
+
+ switch (tbl->dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ dr_table_uninit_nic(&tbl->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ dr_table_uninit_nic(&tbl->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ dr_table_uninit_fdb(tbl);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ mlx5dr_domain_unlock(tbl->dmn);
+}
+
+static int dr_table_init_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_table_rx_tx *nic_tbl)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
+ struct mlx5dr_htbl_connect_info info;
+ int ret;
+
+ INIT_LIST_HEAD(&nic_tbl->nic_matcher_list);
+
+ nic_tbl->default_icm_addr = nic_dmn->default_icm_addr;
+
+ nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ DR_CHUNK_SIZE_1,
+ MLX5DR_STE_LU_TYPE_DONT_CARE,
+ 0);
+ if (!nic_tbl->s_anchor) {
+ mlx5dr_err(dmn, "Failed allocating htbl\n");
+ return -ENOMEM;
+ }
+
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_dmn->default_icm_addr;
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
+ nic_tbl->s_anchor,
+ &info, true);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed int and send htbl\n");
+ goto free_s_anchor;
+ }
+
+ mlx5dr_htbl_get(nic_tbl->s_anchor);
+
+ return 0;
+
+free_s_anchor:
+ mlx5dr_ste_htbl_free(nic_tbl->s_anchor);
+ return ret;
+}
+
+static int dr_table_init_fdb(struct mlx5dr_table *tbl)
+{
+ int ret;
+
+ ret = dr_table_init_nic(tbl->dmn, &tbl->rx);
+ if (ret)
+ return ret;
+
+ ret = dr_table_init_nic(tbl->dmn, &tbl->tx);
+ if (ret)
+ goto destroy_rx;
+
+ return 0;
+
+destroy_rx:
+ dr_table_uninit_nic(&tbl->rx);
+ return ret;
+}
+
+static int dr_table_init(struct mlx5dr_table *tbl)
+{
+ int ret = 0;
+
+ INIT_LIST_HEAD(&tbl->matcher_list);
+
+ mlx5dr_domain_lock(tbl->dmn);
+
+ switch (tbl->dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_RX;
+ tbl->rx.nic_dmn = &tbl->dmn->info.rx;
+ ret = dr_table_init_nic(tbl->dmn, &tbl->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_TX;
+ tbl->tx.nic_dmn = &tbl->dmn->info.tx;
+ ret = dr_table_init_nic(tbl->dmn, &tbl->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ tbl->table_type = MLX5_FLOW_TABLE_TYPE_FDB;
+ tbl->rx.nic_dmn = &tbl->dmn->info.rx;
+ tbl->tx.nic_dmn = &tbl->dmn->info.tx;
+ ret = dr_table_init_fdb(tbl);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ mlx5dr_domain_unlock(tbl->dmn);
+
+ return ret;
+}
+
+static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl)
+{
+ return mlx5dr_cmd_destroy_flow_table(tbl->dmn->mdev,
+ tbl->table_id,
+ tbl->table_type);
+}
+
+static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl, u16 uid)
+{
+ bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
+ bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+ struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
+ u64 icm_addr_rx = 0;
+ u64 icm_addr_tx = 0;
+ int ret;
+
+ if (tbl->rx.s_anchor)
+ icm_addr_rx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->rx.s_anchor->chunk);
+
+ if (tbl->tx.s_anchor)
+ icm_addr_tx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->tx.s_anchor->chunk);
+
+ ft_attr.table_type = tbl->table_type;
+ ft_attr.icm_addr_rx = icm_addr_rx;
+ ft_attr.icm_addr_tx = icm_addr_tx;
+ ft_attr.level = tbl->dmn->info.caps.max_ft_level - 1;
+ ft_attr.sw_owner = true;
+ ft_attr.decap_en = en_decap;
+ ft_attr.reformat_en = en_encap;
+ ft_attr.uid = uid;
+
+ ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr,
+ NULL, &tbl->table_id);
+
+ return ret;
+}
+
+struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level,
+ u32 flags, u16 uid)
+{
+ struct mlx5dr_table *tbl;
+ int ret;
+
+ refcount_inc(&dmn->refcount);
+
+ tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
+ if (!tbl)
+ goto dec_ref;
+
+ tbl->dmn = dmn;
+ tbl->level = level;
+ tbl->flags = flags;
+ refcount_set(&tbl->refcount, 1);
+
+ ret = dr_table_init(tbl);
+ if (ret)
+ goto free_tbl;
+
+ ret = dr_table_create_sw_owned_tbl(tbl, uid);
+ if (ret)
+ goto uninit_tbl;
+
+ INIT_LIST_HEAD(&tbl->dbg_node);
+ mlx5dr_dbg_tbl_add(tbl);
+ return tbl;
+
+uninit_tbl:
+ dr_table_uninit(tbl);
+free_tbl:
+ kfree(tbl);
+dec_ref:
+ refcount_dec(&dmn->refcount);
+ return NULL;
+}
+
+int mlx5dr_table_destroy(struct mlx5dr_table *tbl)
+{
+ int ret;
+
+ if (WARN_ON_ONCE(refcount_read(&tbl->refcount) > 1))
+ return -EBUSY;
+
+ mlx5dr_dbg_tbl_del(tbl);
+ ret = dr_table_destroy_sw_owned_tbl(tbl);
+ if (ret)
+ return ret;
+
+ dr_table_uninit(tbl);
+
+ if (tbl->miss_action)
+ refcount_dec(&tbl->miss_action->refcount);
+
+ refcount_dec(&tbl->dmn->refcount);
+ kfree(tbl);
+
+ return ret;
+}
+
+u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl)
+{
+ return tbl->table_id;
+}
+
+struct mlx5dr_table *mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft)
+{
+ return ft->fs_dr_table.dr_table;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
new file mode 100644
index 000000000..1777a1e50
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -0,0 +1,1472 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef _DR_TYPES_
+#define _DR_TYPES_
+
+#include <linux/mlx5/vport.h>
+#include <linux/refcount.h>
+#include "fs_core.h"
+#include "wq.h"
+#include "lib/mlx5.h"
+#include "mlx5_ifc_dr.h"
+#include "mlx5dr.h"
+#include "dr_dbg.h"
+
+#define DR_RULE_MAX_STES 18
+#define DR_ACTION_MAX_STES 5
+#define DR_STE_SVLAN 0x1
+#define DR_STE_CVLAN 0x2
+#define DR_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4)
+#define DR_NUM_OF_FLEX_PARSERS 8
+#define DR_STE_MAX_FLEX_0_ID 3
+#define DR_STE_MAX_FLEX_1_ID 7
+
+#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg)
+#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg)
+#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg)
+
+static inline bool dr_is_flex_parser_0_id(u8 parser_id)
+{
+ return parser_id <= DR_STE_MAX_FLEX_0_ID;
+}
+
+static inline bool dr_is_flex_parser_1_id(u8 parser_id)
+{
+ return parser_id > DR_STE_MAX_FLEX_0_ID;
+}
+
+enum mlx5dr_icm_chunk_size {
+ DR_CHUNK_SIZE_1,
+ DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */
+ DR_CHUNK_SIZE_2,
+ DR_CHUNK_SIZE_4,
+ DR_CHUNK_SIZE_8,
+ DR_CHUNK_SIZE_16,
+ DR_CHUNK_SIZE_32,
+ DR_CHUNK_SIZE_64,
+ DR_CHUNK_SIZE_128,
+ DR_CHUNK_SIZE_256,
+ DR_CHUNK_SIZE_512,
+ DR_CHUNK_SIZE_1K,
+ DR_CHUNK_SIZE_2K,
+ DR_CHUNK_SIZE_4K,
+ DR_CHUNK_SIZE_8K,
+ DR_CHUNK_SIZE_16K,
+ DR_CHUNK_SIZE_32K,
+ DR_CHUNK_SIZE_64K,
+ DR_CHUNK_SIZE_128K,
+ DR_CHUNK_SIZE_256K,
+ DR_CHUNK_SIZE_512K,
+ DR_CHUNK_SIZE_1024K,
+ DR_CHUNK_SIZE_2048K,
+ DR_CHUNK_SIZE_MAX,
+};
+
+enum mlx5dr_icm_type {
+ DR_ICM_TYPE_STE,
+ DR_ICM_TYPE_MODIFY_ACTION,
+};
+
+static inline enum mlx5dr_icm_chunk_size
+mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk)
+{
+ chunk += 2;
+ if (chunk < DR_CHUNK_SIZE_MAX)
+ return chunk;
+
+ return DR_CHUNK_SIZE_MAX;
+}
+
+enum {
+ DR_STE_SIZE = 64,
+ DR_STE_SIZE_CTRL = 32,
+ DR_STE_SIZE_TAG = 16,
+ DR_STE_SIZE_MASK = 16,
+ DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK,
+};
+
+enum mlx5dr_ste_ctx_action_cap {
+ DR_STE_CTX_ACTION_CAP_NONE = 0,
+ DR_STE_CTX_ACTION_CAP_TX_POP = 1 << 0,
+ DR_STE_CTX_ACTION_CAP_RX_PUSH = 1 << 1,
+ DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 2,
+ DR_STE_CTX_ACTION_CAP_POP_MDFY = 1 << 3,
+};
+
+enum {
+ DR_MODIFY_ACTION_SIZE = 8,
+};
+
+enum mlx5dr_matcher_criteria {
+ DR_MATCHER_CRITERIA_EMPTY = 0,
+ DR_MATCHER_CRITERIA_OUTER = 1 << 0,
+ DR_MATCHER_CRITERIA_MISC = 1 << 1,
+ DR_MATCHER_CRITERIA_INNER = 1 << 2,
+ DR_MATCHER_CRITERIA_MISC2 = 1 << 3,
+ DR_MATCHER_CRITERIA_MISC3 = 1 << 4,
+ DR_MATCHER_CRITERIA_MISC4 = 1 << 5,
+ DR_MATCHER_CRITERIA_MISC5 = 1 << 6,
+ DR_MATCHER_CRITERIA_MAX = 1 << 7,
+};
+
+enum mlx5dr_action_type {
+ DR_ACTION_TYP_TNL_L2_TO_L2,
+ DR_ACTION_TYP_L2_TO_TNL_L2,
+ DR_ACTION_TYP_TNL_L3_TO_L2,
+ DR_ACTION_TYP_L2_TO_TNL_L3,
+ DR_ACTION_TYP_DROP,
+ DR_ACTION_TYP_QP,
+ DR_ACTION_TYP_FT,
+ DR_ACTION_TYP_CTR,
+ DR_ACTION_TYP_TAG,
+ DR_ACTION_TYP_MODIFY_HDR,
+ DR_ACTION_TYP_VPORT,
+ DR_ACTION_TYP_POP_VLAN,
+ DR_ACTION_TYP_PUSH_VLAN,
+ DR_ACTION_TYP_INSERT_HDR,
+ DR_ACTION_TYP_REMOVE_HDR,
+ DR_ACTION_TYP_SAMPLER,
+ DR_ACTION_TYP_ASO_FLOW_METER,
+ DR_ACTION_TYP_MAX,
+};
+
+enum mlx5dr_ipv {
+ DR_RULE_IPV4,
+ DR_RULE_IPV6,
+ DR_RULE_IPV_MAX,
+};
+
+struct mlx5dr_icm_pool;
+struct mlx5dr_icm_chunk;
+struct mlx5dr_icm_buddy_mem;
+struct mlx5dr_ste_htbl;
+struct mlx5dr_match_param;
+struct mlx5dr_cmd_caps;
+struct mlx5dr_rule_rx_tx;
+struct mlx5dr_matcher_rx_tx;
+struct mlx5dr_ste_ctx;
+
+struct mlx5dr_ste {
+ /* refcount: indicates the num of rules that using this ste */
+ u32 refcount;
+
+ /* this ste is part of a rule, located in ste's chain */
+ u8 ste_chain_location;
+
+ /* attached to the miss_list head at each htbl entry */
+ struct list_head miss_list_node;
+
+ /* this ste is member of htbl */
+ struct mlx5dr_ste_htbl *htbl;
+
+ struct mlx5dr_ste_htbl *next_htbl;
+
+ /* The rule this STE belongs to */
+ struct mlx5dr_rule_rx_tx *rule_rx_tx;
+};
+
+struct mlx5dr_ste_htbl_ctrl {
+ /* total number of valid entries belonging to this hash table. This
+ * includes the non collision and collision entries
+ */
+ unsigned int num_of_valid_entries;
+
+ /* total number of collisions entries attached to this table */
+ unsigned int num_of_collisions;
+};
+
+struct mlx5dr_ste_htbl {
+ u16 lu_type;
+ u16 byte_mask;
+ u32 refcount;
+ struct mlx5dr_icm_chunk *chunk;
+ struct mlx5dr_ste *pointing_ste;
+ struct mlx5dr_ste_htbl_ctrl ctrl;
+};
+
+struct mlx5dr_ste_send_info {
+ struct mlx5dr_ste *ste;
+ struct list_head send_list;
+ u16 size;
+ u16 offset;
+ u8 data_cont[DR_STE_SIZE];
+ u8 *data;
+};
+
+void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
+ u16 offset, u8 *data,
+ struct mlx5dr_ste_send_info *ste_info,
+ struct list_head *send_list,
+ bool copy_data);
+
+struct mlx5dr_ste_build {
+ u8 inner:1;
+ u8 rx:1;
+ u8 vhca_id_valid:1;
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_cmd_caps *caps;
+ u16 lu_type;
+ u16 byte_mask;
+ u8 bit_mask[DR_STE_SIZE_MASK];
+ int (*ste_build_tag_func)(struct mlx5dr_match_param *spec,
+ struct mlx5dr_ste_build *sb,
+ u8 *tag);
+};
+
+struct mlx5dr_ste_htbl *
+mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size,
+ u16 lu_type, u16 byte_mask);
+
+int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl);
+
+static inline void mlx5dr_htbl_put(struct mlx5dr_ste_htbl *htbl)
+{
+ htbl->refcount--;
+ if (!htbl->refcount)
+ mlx5dr_ste_htbl_free(htbl);
+}
+
+static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl)
+{
+ htbl->refcount++;
+}
+
+/* STE utils */
+u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl);
+void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste, u64 miss_addr);
+void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste, u64 icm_addr, u32 ht_size);
+void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste,
+ struct mlx5dr_ste_htbl *next_htbl);
+void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask);
+bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
+ u8 ste_location);
+u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste);
+u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste);
+struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste);
+
+#define MLX5DR_MAX_VLANS 2
+
+struct mlx5dr_ste_actions_attr {
+ u32 modify_index;
+ u16 modify_actions;
+ u32 decap_index;
+ u16 decap_actions;
+ u8 decap_with_vlan:1;
+ u64 final_icm_addr;
+ u32 flow_tag;
+ u32 ctr_id;
+ u16 gvmi;
+ u16 hit_gvmi;
+ struct {
+ u32 id;
+ u32 size;
+ u8 param_0;
+ u8 param_1;
+ } reformat;
+ struct {
+ int count;
+ u32 headers[MLX5DR_MAX_VLANS];
+ } vlans;
+
+ struct {
+ u32 obj_id;
+ u32 offset;
+ u8 dest_reg_id;
+ u8 init_color;
+ } aso_flow_meter;
+};
+
+void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes);
+void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u8 *last_ste,
+ struct mlx5dr_ste_actions_attr *attr,
+ u32 *added_stes);
+
+void mlx5dr_ste_set_action_set(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data);
+void mlx5dr_ste_set_action_add(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 hw_field,
+ u8 shifter,
+ u8 length,
+ u32 data);
+void mlx5dr_ste_set_action_copy(struct mlx5dr_ste_ctx *ste_ctx,
+ __be64 *hw_action,
+ u8 dst_hw_field,
+ u8 dst_shifter,
+ u8 dst_len,
+ u8 src_hw_field,
+ u8 src_shifter);
+int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
+ void *data,
+ u32 data_sz,
+ u8 *hw_action,
+ u32 hw_action_sz,
+ u16 *used_hw_action_num);
+
+const struct mlx5dr_ste_action_modify_field *
+mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field);
+
+struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version);
+void mlx5dr_ste_free(struct mlx5dr_ste *ste,
+ struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher);
+static inline void mlx5dr_ste_put(struct mlx5dr_ste *ste,
+ struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ ste->refcount--;
+ if (!ste->refcount)
+ mlx5dr_ste_free(ste, matcher, nic_matcher);
+}
+
+/* initial as 0, increased only when ste appears in a new rule */
+static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste)
+{
+ ste->refcount++;
+}
+
+static inline bool mlx5dr_ste_is_not_used(struct mlx5dr_ste *ste)
+{
+ return !ste->refcount;
+}
+
+bool mlx5dr_ste_equal_tag(void *src, void *dst);
+int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *ste,
+ u8 *cur_hw_ste,
+ enum mlx5dr_icm_chunk_size log_table_size);
+
+/* STE build functions */
+int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
+ u8 match_criteria,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_match_param *value);
+int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_match_param *value,
+ u8 *ste_arr);
+void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *builder,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_mpls(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_register_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn,
+ bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+ struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx);
+
+/* Actions utils */
+int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_action *actions[],
+ u32 num_actions,
+ u8 *ste_arr,
+ u32 *new_hw_ste_arr_sz);
+
+struct mlx5dr_match_spec {
+ u32 smac_47_16; /* Source MAC address of incoming packet */
+ /* Incoming packet Ethertype - this is the Ethertype
+ * following the last VLAN tag of the packet
+ */
+ u32 smac_15_0:16; /* Source MAC address of incoming packet */
+ u32 ethertype:16;
+
+ u32 dmac_47_16; /* Destination MAC address of incoming packet */
+
+ u32 dmac_15_0:16; /* Destination MAC address of incoming packet */
+ /* Priority of first VLAN tag in the incoming packet.
+ * Valid only when cvlan_tag==1 or svlan_tag==1
+ */
+ u32 first_prio:3;
+ /* CFI bit of first VLAN tag in the incoming packet.
+ * Valid only when cvlan_tag==1 or svlan_tag==1
+ */
+ u32 first_cfi:1;
+ /* VLAN ID of first VLAN tag in the incoming packet.
+ * Valid only when cvlan_tag==1 or svlan_tag==1
+ */
+ u32 first_vid:12;
+
+ u32 ip_protocol:8; /* IP protocol */
+ /* Differentiated Services Code Point derived from
+ * Traffic Class/TOS field of IPv6/v4
+ */
+ u32 ip_dscp:6;
+ /* Explicit Congestion Notification derived from
+ * Traffic Class/TOS field of IPv6/v4
+ */
+ u32 ip_ecn:2;
+ /* The first vlan in the packet is c-vlan (0x8100).
+ * cvlan_tag and svlan_tag cannot be set together
+ */
+ u32 cvlan_tag:1;
+ /* The first vlan in the packet is s-vlan (0x8a88).
+ * cvlan_tag and svlan_tag cannot be set together
+ */
+ u32 svlan_tag:1;
+ u32 frag:1; /* Packet is an IP fragment */
+ u32 ip_version:4; /* IP version */
+ /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK;
+ * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS
+ */
+ u32 tcp_flags:9;
+
+ /* TCP source port.;tcp and udp sport/dport are mutually exclusive */
+ u32 tcp_sport:16;
+ /* TCP destination port.
+ * tcp and udp sport/dport are mutually exclusive
+ */
+ u32 tcp_dport:16;
+
+ u32 reserved_auto1:16;
+ u32 ipv4_ihl:4;
+ u32 reserved_auto2:4;
+ u32 ttl_hoplimit:8;
+
+ /* UDP source port.;tcp and udp sport/dport are mutually exclusive */
+ u32 udp_sport:16;
+ /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */
+ u32 udp_dport:16;
+
+ /* IPv6 source address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 src_ip_127_96;
+ /* IPv6 source address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 src_ip_95_64;
+ /* IPv6 source address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 src_ip_63_32;
+ /* IPv6 source address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 src_ip_31_0;
+ /* IPv6 destination address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 dst_ip_127_96;
+ /* IPv6 destination address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 dst_ip_95_64;
+ /* IPv6 destination address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 dst_ip_63_32;
+ /* IPv6 destination address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 dst_ip_31_0;
+};
+
+struct mlx5dr_match_misc {
+ /* used with GRE, checksum exist when gre_c_present == 1 */
+ u32 gre_c_present:1;
+ u32 reserved_auto1:1;
+ /* used with GRE, key exist when gre_k_present == 1 */
+ u32 gre_k_present:1;
+ /* used with GRE, sequence number exist when gre_s_present == 1 */
+ u32 gre_s_present:1;
+ u32 source_vhca_port:4;
+ u32 source_sqn:24; /* Source SQN */
+
+ u32 source_eswitch_owner_vhca_id:16;
+ /* Source port.;0xffff determines wire port */
+ u32 source_port:16;
+
+ /* Priority of second VLAN tag in the outer header of the incoming packet.
+ * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
+ */
+ u32 outer_second_prio:3;
+ /* CFI bit of first VLAN tag in the outer header of the incoming packet.
+ * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
+ */
+ u32 outer_second_cfi:1;
+ /* VLAN ID of first VLAN tag the outer header of the incoming packet.
+ * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
+ */
+ u32 outer_second_vid:12;
+ /* Priority of second VLAN tag in the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_prio:3;
+ /* CFI bit of first VLAN tag in the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_cfi:1;
+ /* VLAN ID of first VLAN tag the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_vid:12;
+
+ u32 outer_second_cvlan_tag:1;
+ u32 inner_second_cvlan_tag:1;
+ /* The second vlan in the outer header of the packet is c-vlan (0x8100).
+ * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
+ */
+ u32 outer_second_svlan_tag:1;
+ /* The second vlan in the inner header of the packet is c-vlan (0x8100).
+ * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+ */
+ u32 inner_second_svlan_tag:1;
+ /* The second vlan in the outer header of the packet is s-vlan (0x8a88).
+ * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
+ */
+ u32 reserved_auto2:12;
+ /* The second vlan in the inner header of the packet is s-vlan (0x8a88).
+ * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+ */
+ u32 gre_protocol:16; /* GRE Protocol (outer) */
+
+ u32 gre_key_h:24; /* GRE Key[31:8] (outer) */
+ u32 gre_key_l:8; /* GRE Key [7:0] (outer) */
+
+ u32 vxlan_vni:24; /* VXLAN VNI (outer) */
+ u32 reserved_auto3:8;
+
+ u32 geneve_vni:24; /* GENEVE VNI field (outer) */
+ u32 reserved_auto4:6;
+ u32 geneve_tlv_option_0_exist:1;
+ u32 geneve_oam:1; /* GENEVE OAM field (outer) */
+
+ u32 reserved_auto5:12;
+ u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */
+
+ u32 reserved_auto6:12;
+ u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */
+
+ u32 reserved_auto7:10;
+ u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */
+ u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */
+
+ u32 reserved_auto8:8;
+ u32 bth_dst_qp:24; /* Destination QP in BTH header */
+
+ u32 reserved_auto9;
+ u32 outer_esp_spi;
+ u32 reserved_auto10[3];
+};
+
+struct mlx5dr_match_misc2 {
+ u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */
+ u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */
+ u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */
+ u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */
+
+ u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */
+ u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */
+ u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */
+ u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */
+
+ u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */
+ u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */
+ u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */
+
+ u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */
+ u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */
+ u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */
+
+ u32 metadata_reg_c_7; /* metadata_reg_c_7 */
+ u32 metadata_reg_c_6; /* metadata_reg_c_6 */
+ u32 metadata_reg_c_5; /* metadata_reg_c_5 */
+ u32 metadata_reg_c_4; /* metadata_reg_c_4 */
+ u32 metadata_reg_c_3; /* metadata_reg_c_3 */
+ u32 metadata_reg_c_2; /* metadata_reg_c_2 */
+ u32 metadata_reg_c_1; /* metadata_reg_c_1 */
+ u32 metadata_reg_c_0; /* metadata_reg_c_0 */
+ u32 metadata_reg_a; /* metadata_reg_a */
+ u32 reserved_auto1[3];
+};
+
+struct mlx5dr_match_misc3 {
+ u32 inner_tcp_seq_num;
+ u32 outer_tcp_seq_num;
+ u32 inner_tcp_ack_num;
+ u32 outer_tcp_ack_num;
+
+ u32 reserved_auto1:8;
+ u32 outer_vxlan_gpe_vni:24;
+
+ u32 outer_vxlan_gpe_next_protocol:8;
+ u32 outer_vxlan_gpe_flags:8;
+ u32 reserved_auto2:16;
+
+ u32 icmpv4_header_data;
+ u32 icmpv6_header_data;
+
+ u8 icmpv4_type;
+ u8 icmpv4_code;
+ u8 icmpv6_type;
+ u8 icmpv6_code;
+
+ u32 geneve_tlv_option_0_data;
+
+ u32 gtpu_teid;
+
+ u8 gtpu_msg_type;
+ u8 gtpu_msg_flags;
+ u32 reserved_auto3:16;
+
+ u32 gtpu_dw_2;
+ u32 gtpu_first_ext_dw_0;
+ u32 gtpu_dw_0;
+ u32 reserved_auto4;
+};
+
+struct mlx5dr_match_misc4 {
+ u32 prog_sample_field_value_0;
+ u32 prog_sample_field_id_0;
+ u32 prog_sample_field_value_1;
+ u32 prog_sample_field_id_1;
+ u32 prog_sample_field_value_2;
+ u32 prog_sample_field_id_2;
+ u32 prog_sample_field_value_3;
+ u32 prog_sample_field_id_3;
+ u32 reserved_auto1[8];
+};
+
+struct mlx5dr_match_misc5 {
+ u32 macsec_tag_0;
+ u32 macsec_tag_1;
+ u32 macsec_tag_2;
+ u32 macsec_tag_3;
+ u32 tunnel_header_0;
+ u32 tunnel_header_1;
+ u32 tunnel_header_2;
+ u32 tunnel_header_3;
+};
+
+struct mlx5dr_match_param {
+ struct mlx5dr_match_spec outer;
+ struct mlx5dr_match_misc misc;
+ struct mlx5dr_match_spec inner;
+ struct mlx5dr_match_misc2 misc2;
+ struct mlx5dr_match_misc3 misc3;
+ struct mlx5dr_match_misc4 misc4;
+ struct mlx5dr_match_misc5 misc5;
+};
+
+#define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \
+ (_misc3)->icmpv4_code || \
+ (_misc3)->icmpv4_header_data)
+
+#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \
+ (_spec)->src_ip_95_64 || \
+ (_spec)->src_ip_63_32 || \
+ (_spec)->src_ip_31_0)
+
+#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \
+ (_spec)->dst_ip_95_64 || \
+ (_spec)->dst_ip_63_32 || \
+ (_spec)->dst_ip_31_0)
+
+struct mlx5dr_esw_caps {
+ u64 drop_icm_address_rx;
+ u64 drop_icm_address_tx;
+ u64 uplink_icm_address_rx;
+ u64 uplink_icm_address_tx;
+ u8 sw_owner:1;
+ u8 sw_owner_v2:1;
+};
+
+struct mlx5dr_cmd_vport_cap {
+ u16 vport_gvmi;
+ u16 vhca_gvmi;
+ u16 num;
+ u64 icm_address_rx;
+ u64 icm_address_tx;
+};
+
+struct mlx5dr_roce_cap {
+ u8 roce_en:1;
+ u8 fl_rc_qp_when_roce_disabled:1;
+ u8 fl_rc_qp_when_roce_enabled:1;
+};
+
+struct mlx5dr_vports {
+ struct mlx5dr_cmd_vport_cap esw_manager_caps;
+ struct mlx5dr_cmd_vport_cap uplink_caps;
+ struct xarray vports_caps_xa;
+};
+
+struct mlx5dr_cmd_caps {
+ u16 gvmi;
+ u64 nic_rx_drop_address;
+ u64 nic_tx_drop_address;
+ u64 nic_tx_allow_address;
+ u64 esw_rx_drop_address;
+ u64 esw_tx_drop_address;
+ u32 log_icm_size;
+ u64 hdr_modify_icm_addr;
+ u32 flex_protocols;
+ u8 flex_parser_id_icmp_dw0;
+ u8 flex_parser_id_icmp_dw1;
+ u8 flex_parser_id_icmpv6_dw0;
+ u8 flex_parser_id_icmpv6_dw1;
+ u8 flex_parser_id_geneve_tlv_option_0;
+ u8 flex_parser_id_mpls_over_gre;
+ u8 flex_parser_id_mpls_over_udp;
+ u8 flex_parser_id_gtpu_dw_0;
+ u8 flex_parser_id_gtpu_teid;
+ u8 flex_parser_id_gtpu_dw_2;
+ u8 flex_parser_id_gtpu_first_ext_dw_0;
+ u8 flex_parser_ok_bits_supp;
+ u8 max_ft_level;
+ u16 roce_min_src_udp;
+ u8 sw_format_ver;
+ bool eswitch_manager;
+ bool rx_sw_owner;
+ bool tx_sw_owner;
+ bool fdb_sw_owner;
+ u8 rx_sw_owner_v2:1;
+ u8 tx_sw_owner_v2:1;
+ u8 fdb_sw_owner_v2:1;
+ struct mlx5dr_esw_caps esw_caps;
+ struct mlx5dr_vports vports;
+ bool prio_tag_required;
+ struct mlx5dr_roce_cap roce_caps;
+ u8 is_ecpf:1;
+ u8 isolate_vl_tc:1;
+};
+
+enum mlx5dr_domain_nic_type {
+ DR_DOMAIN_NIC_TYPE_RX,
+ DR_DOMAIN_NIC_TYPE_TX,
+};
+
+struct mlx5dr_domain_rx_tx {
+ u64 drop_icm_addr;
+ u64 default_icm_addr;
+ enum mlx5dr_domain_nic_type type;
+ struct mutex mutex; /* protect rx/tx domain */
+};
+
+struct mlx5dr_domain_info {
+ bool supp_sw_steering;
+ u32 max_inline_size;
+ u32 max_send_wr;
+ u32 max_log_sw_icm_sz;
+ u32 max_log_action_icm_sz;
+ struct mlx5dr_domain_rx_tx rx;
+ struct mlx5dr_domain_rx_tx tx;
+ struct mlx5dr_cmd_caps caps;
+};
+
+struct mlx5dr_domain {
+ struct mlx5dr_domain *peer_dmn;
+ struct mlx5_core_dev *mdev;
+ u32 pdn;
+ struct mlx5_uars_page *uar;
+ enum mlx5dr_domain_type type;
+ refcount_t refcount;
+ struct mlx5dr_icm_pool *ste_icm_pool;
+ struct mlx5dr_icm_pool *action_icm_pool;
+ struct mlx5dr_send_ring *send_ring;
+ struct mlx5dr_domain_info info;
+ struct xarray csum_fts_xa;
+ struct mlx5dr_ste_ctx *ste_ctx;
+ struct list_head dbg_tbl_list;
+ struct mlx5dr_dbg_dump_info dump_info;
+};
+
+struct mlx5dr_table_rx_tx {
+ struct mlx5dr_ste_htbl *s_anchor;
+ struct mlx5dr_domain_rx_tx *nic_dmn;
+ u64 default_icm_addr;
+ struct list_head nic_matcher_list;
+};
+
+struct mlx5dr_table {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_table_rx_tx rx;
+ struct mlx5dr_table_rx_tx tx;
+ u32 level;
+ u32 table_type;
+ u32 table_id;
+ u32 flags;
+ struct list_head matcher_list;
+ struct mlx5dr_action *miss_action;
+ refcount_t refcount;
+ struct list_head dbg_node;
+};
+
+struct mlx5dr_matcher_rx_tx {
+ struct mlx5dr_ste_htbl *s_htbl;
+ struct mlx5dr_ste_htbl *e_anchor;
+ struct mlx5dr_ste_build *ste_builder;
+ struct mlx5dr_ste_build ste_builder_arr[DR_RULE_IPV_MAX]
+ [DR_RULE_IPV_MAX]
+ [DR_RULE_MAX_STES];
+ u8 num_of_builders;
+ u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX];
+ u64 default_icm_addr;
+ struct mlx5dr_table_rx_tx *nic_tbl;
+ u32 prio;
+ struct list_head list_node;
+ u32 rules;
+};
+
+struct mlx5dr_matcher {
+ struct mlx5dr_table *tbl;
+ struct mlx5dr_matcher_rx_tx rx;
+ struct mlx5dr_matcher_rx_tx tx;
+ struct list_head list_node; /* Used for both matchers and dbg managing */
+ u32 prio;
+ struct mlx5dr_match_param mask;
+ u8 match_criteria;
+ refcount_t refcount;
+ struct list_head dbg_rule_list;
+};
+
+struct mlx5dr_ste_action_modify_field {
+ u16 hw_field;
+ u8 start;
+ u8 end;
+ u8 l3_type;
+ u8 l4_type;
+};
+
+struct mlx5dr_action_rewrite {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_icm_chunk *chunk;
+ u8 *data;
+ u16 num_of_actions;
+ u32 index;
+ u8 allow_rx:1;
+ u8 allow_tx:1;
+ u8 modify_ttl:1;
+};
+
+struct mlx5dr_action_reformat {
+ struct mlx5dr_domain *dmn;
+ u32 id;
+ u32 size;
+ u8 param_0;
+ u8 param_1;
+};
+
+struct mlx5dr_action_sampler {
+ struct mlx5dr_domain *dmn;
+ u64 rx_icm_addr;
+ u64 tx_icm_addr;
+ u32 sampler_id;
+};
+
+struct mlx5dr_action_dest_tbl {
+ u8 is_fw_tbl:1;
+ union {
+ struct mlx5dr_table *tbl;
+ struct {
+ struct mlx5dr_domain *dmn;
+ u32 id;
+ u32 group_id;
+ enum fs_flow_table_type type;
+ u64 rx_icm_addr;
+ u64 tx_icm_addr;
+ struct mlx5dr_action **ref_actions;
+ u32 num_of_ref_actions;
+ } fw_tbl;
+ };
+};
+
+struct mlx5dr_action_ctr {
+ u32 ctr_id;
+ u32 offset;
+};
+
+struct mlx5dr_action_vport {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_cmd_vport_cap *caps;
+};
+
+struct mlx5dr_action_push_vlan {
+ u32 vlan_hdr; /* tpid_pcp_dei_vid */
+};
+
+struct mlx5dr_action_flow_tag {
+ u32 flow_tag;
+};
+
+struct mlx5dr_rule_action_member {
+ struct mlx5dr_action *action;
+ struct list_head list;
+};
+
+struct mlx5dr_action_aso_flow_meter {
+ struct mlx5dr_domain *dmn;
+ u32 obj_id;
+ u32 offset;
+ u8 dest_reg_id;
+ u8 init_color;
+};
+
+struct mlx5dr_action {
+ enum mlx5dr_action_type action_type;
+ refcount_t refcount;
+
+ union {
+ void *data;
+ struct mlx5dr_action_rewrite *rewrite;
+ struct mlx5dr_action_reformat *reformat;
+ struct mlx5dr_action_sampler *sampler;
+ struct mlx5dr_action_dest_tbl *dest_tbl;
+ struct mlx5dr_action_ctr *ctr;
+ struct mlx5dr_action_vport *vport;
+ struct mlx5dr_action_push_vlan *push_vlan;
+ struct mlx5dr_action_flow_tag *flow_tag;
+ struct mlx5dr_action_aso_flow_meter *aso;
+ };
+};
+
+enum mlx5dr_connect_type {
+ CONNECT_HIT = 1,
+ CONNECT_MISS = 2,
+};
+
+struct mlx5dr_htbl_connect_info {
+ enum mlx5dr_connect_type type;
+ union {
+ struct mlx5dr_ste_htbl *hit_next_htbl;
+ u64 miss_icm_addr;
+ };
+};
+
+struct mlx5dr_rule_rx_tx {
+ struct mlx5dr_matcher_rx_tx *nic_matcher;
+ struct mlx5dr_ste *last_rule_ste;
+};
+
+struct mlx5dr_rule {
+ struct mlx5dr_matcher *matcher;
+ struct mlx5dr_rule_rx_tx rx;
+ struct mlx5dr_rule_rx_tx tx;
+ struct list_head rule_actions_list;
+ struct list_head dbg_node;
+ u32 flow_source;
+};
+
+void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste *ste,
+ bool force);
+int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr,
+ struct mlx5dr_ste *curr_ste,
+ int *num_of_stes);
+
+struct mlx5dr_icm_chunk {
+ struct mlx5dr_icm_buddy_mem *buddy_mem;
+ struct list_head chunk_list;
+
+ /* indicates the index of this chunk in the whole memory,
+ * used for deleting the chunk from the buddy
+ */
+ unsigned int seg;
+ enum mlx5dr_icm_chunk_size size;
+
+ /* Memory optimisation */
+ struct mlx5dr_ste *ste_arr;
+ u8 *hw_ste_arr;
+ struct list_head *miss_list;
+};
+
+static inline void mlx5dr_domain_nic_lock(struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+ mutex_lock(&nic_dmn->mutex);
+}
+
+static inline void mlx5dr_domain_nic_unlock(struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+ mutex_unlock(&nic_dmn->mutex);
+}
+
+static inline void mlx5dr_domain_lock(struct mlx5dr_domain *dmn)
+{
+ mlx5dr_domain_nic_lock(&dmn->info.rx);
+ mlx5dr_domain_nic_lock(&dmn->info.tx);
+}
+
+static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn)
+{
+ mlx5dr_domain_nic_unlock(&dmn->info.tx);
+ mlx5dr_domain_nic_unlock(&dmn->info.rx);
+}
+
+int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher);
+int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *nic_matcher);
+
+int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ enum mlx5dr_ipv outer_ipv,
+ enum mlx5dr_ipv inner_ipv);
+
+u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk);
+u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk);
+u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk);
+u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk);
+u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk);
+u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste);
+
+static inline int
+mlx5dr_icm_pool_dm_type_to_entry_size(enum mlx5dr_icm_type icm_type)
+{
+ if (icm_type == DR_ICM_TYPE_STE)
+ return DR_STE_SIZE;
+
+ return DR_MODIFY_ACTION_SIZE;
+}
+
+static inline u32
+mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size)
+{
+ return 1 << chunk_size;
+}
+
+static inline int
+mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size,
+ enum mlx5dr_icm_type icm_type)
+{
+ int num_of_entries;
+ int entry_size;
+
+ entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(icm_type);
+ num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size);
+
+ return entry_size * num_of_entries;
+}
+
+static inline int
+mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl)
+{
+ int num_of_entries =
+ mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk->size);
+
+ /* Threshold is 50%, one is added to table of size 1 */
+ return (num_of_entries + 1) / 2;
+}
+
+static inline bool
+mlx5dr_ste_htbl_may_grow(struct mlx5dr_ste_htbl *htbl)
+{
+ if (htbl->chunk->size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
+ return false;
+
+ return true;
+}
+
+struct mlx5dr_cmd_vport_cap *
+mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport);
+
+struct mlx5dr_cmd_query_flow_table_details {
+ u8 status;
+ u8 level;
+ u64 sw_owner_icm_root_1;
+ u64 sw_owner_icm_root_0;
+};
+
+struct mlx5dr_cmd_create_flow_table_attr {
+ u32 table_type;
+ u16 uid;
+ u64 icm_addr_rx;
+ u64 icm_addr_tx;
+ u8 level;
+ bool sw_owner;
+ bool term_tbl;
+ bool decap_en;
+ bool reformat_en;
+};
+
+/* internal API functions */
+int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
+ struct mlx5dr_cmd_caps *caps);
+int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
+ bool other_vport, u16 vport_number,
+ u64 *icm_address_rx,
+ u64 *icm_address_tx);
+int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev,
+ bool other_vport, u16 vport_number, u16 *gvmi);
+int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
+ struct mlx5dr_esw_caps *caps);
+int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev,
+ u32 sampler_id,
+ u64 *rx_icm_addr,
+ u64 *tx_icm_addr);
+int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev);
+int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 group_id,
+ u32 modify_header_id,
+ u16 vport_id);
+int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id);
+int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u8 num_of_actions,
+ u64 *actions,
+ u32 *modify_header_id);
+int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
+ u32 modify_header_id);
+int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 *group_id);
+int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 group_id);
+int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
+ struct mlx5dr_cmd_create_flow_table_attr *attr,
+ u64 *fdb_rx_icm_addr,
+ u32 *table_id);
+int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
+ u32 table_id,
+ u32 table_type);
+int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
+ enum fs_flow_table_type type,
+ u32 table_id,
+ struct mlx5dr_cmd_query_flow_table_details *output);
+int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+ enum mlx5_reformat_ctx_type rt,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
+ size_t reformat_size,
+ void *reformat_data,
+ u32 *reformat_id);
+void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
+ u32 reformat_id);
+
+struct mlx5dr_cmd_gid_attr {
+ u8 gid[16];
+ u8 mac[6];
+ u32 roce_ver;
+};
+
+int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
+ u16 index, struct mlx5dr_cmd_gid_attr *attr);
+
+struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
+ enum mlx5dr_icm_type icm_type);
+void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool);
+
+struct mlx5dr_icm_chunk *
+mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size);
+void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk);
+
+void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste_p, u32 ste_size);
+int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain_rx_tx *nic_dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ struct mlx5dr_htbl_connect_info *connect_info,
+ bool update_hw_ste);
+void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx,
+ u16 gvmi,
+ enum mlx5dr_domain_nic_type nic_type,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *formatted_ste,
+ struct mlx5dr_htbl_connect_info *connect_info);
+void mlx5dr_ste_copy_param(u8 match_criteria,
+ struct mlx5dr_match_param *set_param,
+ struct mlx5dr_match_parameters *mask,
+ bool clear);
+
+struct mlx5dr_qp {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_wq_qp wq;
+ struct mlx5_uars_page *uar;
+ struct mlx5_wq_ctrl wq_ctrl;
+ u32 qpn;
+ struct {
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ unsigned int *wqe_head;
+ unsigned int wqe_cnt;
+ } sq;
+ struct {
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ unsigned int wqe_cnt;
+ } rq;
+ int max_inline_data;
+};
+
+struct mlx5dr_cq {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_cqwq wq;
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_core_cq mcq;
+ struct mlx5dr_qp *qp;
+};
+
+struct mlx5dr_mr {
+ struct mlx5_core_dev *mdev;
+ u32 mkey;
+ dma_addr_t dma_addr;
+ void *addr;
+ size_t size;
+};
+
+#define MAX_SEND_CQE 64
+#define MIN_READ_SYNC 64
+
+struct mlx5dr_send_ring {
+ struct mlx5dr_cq *cq;
+ struct mlx5dr_qp *qp;
+ struct mlx5dr_mr *mr;
+ /* How much wqes are waiting for completion */
+ u32 pending_wqe;
+ /* Signal request per this trash hold value */
+ u16 signal_th;
+ /* Each post_send_size less than max_post_send_size */
+ u32 max_post_send_size;
+ /* manage the send queue */
+ u32 tx_head;
+ void *buf;
+ u32 buf_size;
+ u8 sync_buff[MIN_READ_SYNC];
+ struct mlx5dr_mr *sync_mr;
+ spinlock_t lock; /* Protect the data path of the send ring */
+ bool err_state; /* send_ring is not usable in err state */
+};
+
+int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
+void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring);
+int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn);
+int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste *ste,
+ u8 *data,
+ u16 size,
+ u16 offset);
+int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *formatted_ste, u8 *mask);
+int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *ste_init_data,
+ bool update_hw_ste);
+int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *action);
+
+struct mlx5dr_cmd_ft_info {
+ u32 id;
+ u16 vport;
+ enum fs_flow_table_type type;
+};
+
+struct mlx5dr_cmd_flow_destination_hw_info {
+ enum mlx5_flow_destination_type type;
+ union {
+ u32 tir_num;
+ u32 ft_num;
+ u32 ft_id;
+ u32 counter_id;
+ u32 sampler_id;
+ struct {
+ u16 num;
+ u16 vhca_id;
+ u32 reformat_id;
+ u8 flags;
+ } vport;
+ };
+};
+
+struct mlx5dr_cmd_fte_info {
+ u32 dests_size;
+ u32 index;
+ struct mlx5_flow_context flow_context;
+ u32 *val;
+ struct mlx5_flow_act action;
+ struct mlx5dr_cmd_flow_destination_hw_info *dest_arr;
+ bool ignore_flow_level;
+};
+
+int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
+ int opmod, int modify_mask,
+ struct mlx5dr_cmd_ft_info *ft,
+ u32 group_id,
+ struct mlx5dr_cmd_fte_info *fte);
+
+bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps);
+
+struct mlx5dr_fw_recalc_cs_ft {
+ u64 rx_icm_addr;
+ u32 table_id;
+ u32 group_id;
+ u32 modify_hdr_id;
+};
+
+struct mlx5dr_fw_recalc_cs_ft *
+mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u16 vport_num);
+void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft);
+int mlx5dr_domain_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
+ u16 vport_num,
+ u64 *rx_icm_addr);
+int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_cmd_flow_destination_hw_info *dest,
+ int num_dest,
+ bool reformat_req,
+ u32 *tbl_id,
+ u32 *group_id,
+ bool ignore_flow_level,
+ u32 flow_source);
+void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
+ u32 group_id);
+#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
new file mode 100644
index 000000000..13b6d4721
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -0,0 +1,820 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "mlx5dr.h"
+#include "fs_dr.h"
+
+static bool mlx5_dr_is_fw_table(u32 flags)
+{
+ if (flags & MLX5_FLOW_TABLE_TERMINATION)
+ return true;
+
+ return false;
+}
+
+static int mlx5_cmd_dr_update_root_ft(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 underlay_qpn,
+ bool disconnect)
+{
+ return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn,
+ disconnect);
+}
+
+static int set_miss_action(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5dr_action *old_miss_action;
+ struct mlx5dr_action *action = NULL;
+ struct mlx5dr_table *next_tbl;
+ int err;
+
+ next_tbl = next_ft ? next_ft->fs_dr_table.dr_table : NULL;
+ if (next_tbl) {
+ action = mlx5dr_action_create_dest_table(next_tbl);
+ if (!action)
+ return -EINVAL;
+ }
+ old_miss_action = ft->fs_dr_table.miss_action;
+ err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action);
+ if (err && action) {
+ err = mlx5dr_action_destroy(action);
+ if (err)
+ mlx5_core_err(ns->dev,
+ "Failed to destroy action (%d)\n", err);
+ action = NULL;
+ }
+ ft->fs_dr_table.miss_action = action;
+ if (old_miss_action) {
+ err = mlx5dr_action_destroy(old_miss_action);
+ if (err)
+ mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n",
+ err);
+ }
+
+ return err;
+}
+
+static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table_attr *ft_attr,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5dr_table *tbl;
+ u32 flags;
+ int err;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
+ ft_attr,
+ next_ft);
+ flags = ft->flags;
+ /* turn off encap/decap if not supported for sw-str by fw */
+ if (!MLX5_CAP_FLOWTABLE(ns->dev, sw_owner_reformat_supported))
+ flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+ tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags,
+ ft_attr->uid);
+ if (!tbl) {
+ mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
+ return -EINVAL;
+ }
+
+ ft->fs_dr_table.dr_table = tbl;
+ ft->id = mlx5dr_table_get_id(tbl);
+
+ if (next_ft) {
+ err = set_miss_action(ns, ft, next_ft);
+ if (err) {
+ mlx5dr_table_destroy(tbl);
+ ft->fs_dr_table.dr_table = NULL;
+ return err;
+ }
+ }
+
+ ft->max_fte = INT_MAX;
+
+ return 0;
+}
+
+static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft)
+{
+ struct mlx5dr_action *action = ft->fs_dr_table.miss_action;
+ int err;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft);
+
+ err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table);
+ if (err) {
+ mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n",
+ err);
+ return err;
+ }
+ if (action) {
+ err = mlx5dr_action_destroy(action);
+ if (err) {
+ mlx5_core_err(ns->dev, "Failed to destroy action(%d)\n",
+ err);
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft);
+
+ return set_miss_action(ns, ft, next_ft);
+}
+
+static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ struct mlx5_flow_group *fg)
+{
+ struct mlx5dr_matcher *matcher;
+ u32 priority = MLX5_GET(create_flow_group_in, in,
+ start_flow_index);
+ u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
+ in,
+ match_criteria_enable);
+ struct mlx5dr_match_parameters mask;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in,
+ fg);
+
+ mask.match_buf = MLX5_ADDR_OF(create_flow_group_in,
+ in, match_criteria);
+ mask.match_sz = sizeof(fg->mask.match_criteria);
+
+ matcher = mlx5dr_matcher_create(ft->fs_dr_table.dr_table,
+ priority,
+ match_criteria_enable,
+ &mask);
+ if (!matcher) {
+ mlx5_core_err(ns->dev, "Failed creating matcher\n");
+ return -EINVAL;
+ }
+
+ fg->fs_dr_matcher.dr_matcher = matcher;
+ return 0;
+}
+
+static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg)
+{
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg);
+
+ return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher);
+}
+
+static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain,
+ struct mlx5_flow_rule *dst)
+{
+ struct mlx5_flow_destination *dest_attr = &dst->dest_attr;
+
+ return mlx5dr_action_create_dest_vport(domain, dest_attr->vport.num,
+ dest_attr->vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID,
+ dest_attr->vport.vhca_id);
+}
+
+static struct mlx5dr_action *create_uplink_action(struct mlx5dr_domain *domain,
+ struct mlx5_flow_rule *dst)
+{
+ struct mlx5_flow_destination *dest_attr = &dst->dest_attr;
+
+ return mlx5dr_action_create_dest_vport(domain, MLX5_VPORT_UPLINK, 1,
+ dest_attr->vport.vhca_id);
+}
+
+static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain,
+ struct mlx5_flow_rule *dst)
+{
+ struct mlx5_flow_table *dest_ft = dst->dest_attr.ft;
+
+ if (mlx5_dr_is_fw_table(dest_ft->flags))
+ return mlx5dr_action_create_dest_flow_fw_table(domain, dest_ft);
+ return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table);
+}
+
+static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain,
+ struct mlx5_fs_vlan *vlan)
+{
+ u16 n_ethtype = vlan->ethtype;
+ u8 prio = vlan->prio;
+ u16 vid = vlan->vid;
+ u32 vlan_hdr;
+
+ vlan_hdr = (u32)n_ethtype << 16 | (u32)(prio) << 12 | (u32)vid;
+ return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr));
+}
+
+static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst)
+{
+ return (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
+ dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
+ dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+}
+
+/* We want to support a rule with 32 destinations, which means we need to
+ * account for 32 destinations plus usually a counter plus one more action
+ * for a multi-destination flow table.
+ */
+#define MLX5_FLOW_CONTEXT_ACTION_MAX 34
+static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte)
+{
+ struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain;
+ struct mlx5dr_action_dest *term_actions;
+ struct mlx5dr_match_parameters params;
+ struct mlx5_core_dev *dev = ns->dev;
+ struct mlx5dr_action **fs_dr_actions;
+ struct mlx5dr_action *tmp_action;
+ struct mlx5dr_action **actions;
+ bool delay_encap_set = false;
+ struct mlx5dr_rule *rule;
+ struct mlx5_flow_rule *dst;
+ int fs_dr_num_actions = 0;
+ int num_term_actions = 0;
+ int num_actions = 0;
+ size_t match_sz;
+ int err = 0;
+ int i;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte);
+
+ actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions),
+ GFP_KERNEL);
+ if (!actions) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
+ sizeof(*fs_dr_actions), GFP_KERNEL);
+ if (!fs_dr_actions) {
+ err = -ENOMEM;
+ goto free_actions_alloc;
+ }
+
+ term_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
+ sizeof(*term_actions), GFP_KERNEL);
+ if (!term_actions) {
+ err = -ENOMEM;
+ goto free_fs_dr_actions_alloc;
+ }
+
+ match_sz = sizeof(fte->val);
+
+ /* Drop reformat action bit if destination vport set with reformat */
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ if (!contain_vport_reformat_action(dst))
+ continue;
+
+ fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ break;
+ }
+ }
+
+ /* The order of the actions are must to be keep, only the following
+ * order is supported by SW steering:
+ * TX: modify header -> push vlan -> encap
+ * RX: decap -> pop vlan -> modify header
+ */
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+ enum mlx5dr_action_reformat_type decap_type =
+ DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2;
+
+ tmp_action = mlx5dr_action_create_packet_reformat(domain,
+ decap_type,
+ 0, 0, 0,
+ NULL);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
+ bool is_decap = fte->action.pkt_reformat->reformat_type ==
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+
+ if (is_decap)
+ actions[num_actions++] =
+ fte->action.pkt_reformat->action.dr_action;
+ else
+ delay_encap_set = true;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+ tmp_action =
+ mlx5dr_action_create_pop_vlan();
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) {
+ tmp_action =
+ mlx5dr_action_create_pop_vlan();
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ actions[num_actions++] =
+ fte->action.modify_hdr->action.dr_action;
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+ tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+ tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (delay_encap_set)
+ actions[num_actions++] =
+ fte->action.pkt_reformat->action.dr_action;
+
+ /* The order of the actions below is not important */
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ tmp_action = mlx5dr_action_create_drop();
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_actions[num_term_actions++].dest = tmp_action;
+ }
+
+ if (fte->flow_context.flow_tag) {
+ tmp_action =
+ mlx5dr_action_create_tag(fte->flow_context.flow_tag);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ enum mlx5_flow_destination_type type = dst->dest_attr.type;
+ u32 id;
+
+ if (fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ num_term_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+
+ if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ tmp_action = create_ft_action(domain, dst);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_actions[num_term_actions++].dest = tmp_action;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ tmp_action = type == MLX5_FLOW_DESTINATION_TYPE_VPORT ?
+ create_vport_action(domain, dst) :
+ create_uplink_action(domain, dst);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_actions[num_term_actions].dest = tmp_action;
+
+ if (dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+ term_actions[num_term_actions].reformat =
+ dst->dest_attr.vport.pkt_reformat->action.dr_action;
+
+ num_term_actions++;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ id = dst->dest_attr.ft_num;
+ tmp_action = mlx5dr_action_create_dest_table_num(domain,
+ id);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_actions[num_term_actions++].dest = tmp_action;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+ id = dst->dest_attr.sampler_id;
+ tmp_action = mlx5dr_action_create_flow_sampler(domain,
+ id);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_actions[num_term_actions++].dest = tmp_action;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ }
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ u32 id;
+
+ if (dst->dest_attr.type !=
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+
+ id = dst->dest_attr.counter_id;
+ tmp_action =
+ mlx5dr_action_create_flow_counter(id);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+ if (fte->action.exe_aso.type != MLX5_EXE_ASO_FLOW_METER) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+
+ tmp_action =
+ mlx5dr_action_create_aso(domain,
+ fte->action.exe_aso.object_id,
+ fte->action.exe_aso.return_reg_id,
+ fte->action.exe_aso.type,
+ fte->action.exe_aso.flow_meter.init_color,
+ fte->action.exe_aso.flow_meter.meter_idx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ params.match_sz = match_sz;
+ params.match_buf = (u64 *)fte->val;
+ if (num_term_actions == 1) {
+ if (term_actions->reformat) {
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ actions[num_actions++] = term_actions->reformat;
+ }
+
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ actions[num_actions++] = term_actions->dest;
+ } else if (num_term_actions > 1) {
+ bool ignore_flow_level =
+ !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+ u32 flow_source = fte->flow_context.flow_source;
+
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
+ term_actions,
+ num_term_actions,
+ ignore_flow_level,
+ flow_source);
+ if (!tmp_action) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher,
+ &params,
+ num_actions,
+ actions,
+ fte->flow_context.flow_source);
+ if (!rule) {
+ err = -EINVAL;
+ goto free_actions;
+ }
+
+ kfree(term_actions);
+ kfree(actions);
+
+ fte->fs_dr_rule.dr_rule = rule;
+ fte->fs_dr_rule.num_actions = fs_dr_num_actions;
+ fte->fs_dr_rule.dr_actions = fs_dr_actions;
+
+ return 0;
+
+free_actions:
+ /* Free in reverse order to handle action dependencies */
+ for (i = fs_dr_num_actions - 1; i >= 0; i--)
+ if (!IS_ERR_OR_NULL(fs_dr_actions[i]))
+ mlx5dr_action_destroy(fs_dr_actions[i]);
+
+ kfree(term_actions);
+free_fs_dr_actions_alloc:
+ kfree(fs_dr_actions);
+free_actions_alloc:
+ kfree(actions);
+out_err:
+ mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err);
+ return err;
+}
+
+static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat_params *params,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain;
+ struct mlx5dr_action *action;
+ int dr_reformat;
+
+ switch (params->type) {
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
+ case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2;
+ break;
+ case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2;
+ break;
+ case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3;
+ break;
+ case MLX5_REFORMAT_TYPE_INSERT_HDR:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_INSERT_HDR;
+ break;
+ case MLX5_REFORMAT_TYPE_REMOVE_HDR:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_REMOVE_HDR;
+ break;
+ default:
+ mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n",
+ params->type);
+ return -EOPNOTSUPP;
+ }
+
+ action = mlx5dr_action_create_packet_reformat(dr_domain,
+ dr_reformat,
+ params->param_0,
+ params->param_1,
+ params->size,
+ params->data);
+ if (!action) {
+ mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n");
+ return -EINVAL;
+ }
+
+ pkt_reformat->action.dr_action = action;
+
+ return 0;
+}
+
+static void mlx5_cmd_dr_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ mlx5dr_action_destroy(pkt_reformat->action.dr_action);
+}
+
+static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+ u8 namespace, u8 num_actions,
+ void *modify_actions,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain;
+ struct mlx5dr_action *action;
+ size_t actions_sz;
+
+ actions_sz = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) *
+ num_actions;
+ action = mlx5dr_action_create_modify_header(dr_domain, 0,
+ actions_sz,
+ modify_actions);
+ if (!action) {
+ mlx5_core_err(ns->dev, "Failed allocating modify-header action\n");
+ return -EINVAL;
+ }
+
+ modify_hdr->action.dr_action = action;
+
+ return 0;
+}
+
+static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ mlx5dr_action_destroy(modify_hdr->action.dr_action);
+}
+
+static int
+mlx5_cmd_dr_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+ int definer_id)
+{
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_dr_create_match_definer(struct mlx5_flow_root_namespace *ns,
+ u16 format_id, u32 *match_mask)
+{
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ struct mlx5_fs_dr_rule *rule = &fte->fs_dr_rule;
+ int err;
+ int i;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte);
+
+ err = mlx5dr_rule_destroy(rule->dr_rule);
+ if (err)
+ return err;
+
+ /* Free in reverse order to handle action dependencies */
+ for (i = rule->num_actions - 1; i >= 0; i--)
+ if (!IS_ERR_OR_NULL(rule->dr_actions[i]))
+ mlx5dr_action_destroy(rule->dr_actions[i]);
+
+ kfree(rule->dr_actions);
+ return 0;
+}
+
+static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ struct fs_fte fte_tmp = {};
+ int ret;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, modify_mask, fte);
+
+ /* Backup current dr rule details */
+ fte_tmp.fs_dr_rule = fte->fs_dr_rule;
+ memset(&fte->fs_dr_rule, 0, sizeof(struct mlx5_fs_dr_rule));
+
+ /* First add the new updated rule, then delete the old rule */
+ ret = mlx5_cmd_dr_create_fte(ns, ft, group, fte);
+ if (ret)
+ goto restore_fte;
+
+ ret = mlx5_cmd_dr_delete_fte(ns, ft, &fte_tmp);
+ WARN_ONCE(ret, "dr update fte duplicate rule deletion failed\n");
+ return ret;
+
+restore_fte:
+ fte->fs_dr_rule = fte_tmp.fs_dr_rule;
+ return ret;
+}
+
+static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns)
+{
+ struct mlx5dr_domain *peer_domain = NULL;
+
+ if (peer_ns)
+ peer_domain = peer_ns->fs_dr_domain.dr_domain;
+ mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain,
+ peer_domain);
+ return 0;
+}
+
+static int mlx5_cmd_dr_create_ns(struct mlx5_flow_root_namespace *ns)
+{
+ ns->fs_dr_domain.dr_domain =
+ mlx5dr_domain_create(ns->dev,
+ MLX5DR_DOMAIN_TYPE_FDB);
+ if (!ns->fs_dr_domain.dr_domain) {
+ mlx5_core_err(ns->dev, "Failed to create dr flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
+{
+ return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain);
+}
+
+static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ if (ft_type != FS_FT_FDB ||
+ MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5)
+ return 0;
+
+ return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX;
+}
+
+bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
+{
+ return mlx5dr_is_supported(dev);
+}
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = {
+ .create_flow_table = mlx5_cmd_dr_create_flow_table,
+ .destroy_flow_table = mlx5_cmd_dr_destroy_flow_table,
+ .modify_flow_table = mlx5_cmd_dr_modify_flow_table,
+ .create_flow_group = mlx5_cmd_dr_create_flow_group,
+ .destroy_flow_group = mlx5_cmd_dr_destroy_flow_group,
+ .create_fte = mlx5_cmd_dr_create_fte,
+ .update_fte = mlx5_cmd_dr_update_fte,
+ .delete_fte = mlx5_cmd_dr_delete_fte,
+ .update_root_ft = mlx5_cmd_dr_update_root_ft,
+ .packet_reformat_alloc = mlx5_cmd_dr_packet_reformat_alloc,
+ .packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc,
+ .modify_header_alloc = mlx5_cmd_dr_modify_header_alloc,
+ .modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc,
+ .create_match_definer = mlx5_cmd_dr_create_match_definer,
+ .destroy_match_definer = mlx5_cmd_dr_destroy_match_definer,
+ .set_peer = mlx5_cmd_dr_set_peer,
+ .create_ns = mlx5_cmd_dr_create_ns,
+ .destroy_ns = mlx5_cmd_dr_destroy_ns,
+ .get_capabilities = mlx5_cmd_dr_get_capabilities,
+};
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
+{
+ return &mlx5_flow_cmds_dr;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
new file mode 100644
index 000000000..d16862206
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2019 Mellanox Technologies
+ */
+
+#ifndef _MLX5_FS_DR_
+#define _MLX5_FS_DR_
+
+#include "mlx5dr.h"
+
+struct mlx5_flow_root_namespace;
+struct fs_fte;
+
+struct mlx5_fs_dr_action {
+ struct mlx5dr_action *dr_action;
+};
+
+struct mlx5_fs_dr_rule {
+ struct mlx5dr_rule *dr_rule;
+ /* Only actions created by fs_dr */
+ struct mlx5dr_action **dr_actions;
+ int num_actions;
+};
+
+struct mlx5_fs_dr_domain {
+ struct mlx5dr_domain *dr_domain;
+};
+
+struct mlx5_fs_dr_matcher {
+ struct mlx5dr_matcher *dr_matcher;
+};
+
+struct mlx5_fs_dr_table {
+ struct mlx5dr_table *dr_table;
+ struct mlx5dr_action *miss_action;
+};
+
+#ifdef CONFIG_MLX5_SW_STEERING
+
+bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev);
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void);
+
+#else
+
+static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
+{
+ return NULL;
+}
+
+static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
+{
+ return false;
+}
+
+#endif /* CONFIG_MLX5_SW_STEERING */
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
new file mode 100644
index 000000000..fb078fa0f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
@@ -0,0 +1,603 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef MLX5_IFC_DR_H
+#define MLX5_IFC_DR_H
+
+enum {
+ MLX5DR_STE_LU_TYPE_DONT_CARE = 0x0f,
+};
+
+struct mlx5_ifc_ste_general_bits {
+ u8 entry_type[0x4];
+ u8 reserved_at_4[0x4];
+ u8 entry_sub_type[0x8];
+ u8 byte_mask[0x10];
+
+ u8 next_table_base_63_48[0x10];
+ u8 next_lu_type[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 linear_hash_enable[0x1];
+ u8 reserved_at_5c[0x2];
+ u8 next_table_rank[0x2];
+
+ u8 reserved_at_60[0xa0];
+ u8 tag_value[0x60];
+ u8 bit_mask[0x60];
+};
+
+struct mlx5_ifc_ste_sx_transmit_bits {
+ u8 entry_type[0x4];
+ u8 reserved_at_4[0x4];
+ u8 entry_sub_type[0x8];
+ u8 byte_mask[0x10];
+
+ u8 next_table_base_63_48[0x10];
+ u8 next_lu_type[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 linear_hash_enable[0x1];
+ u8 reserved_at_5c[0x2];
+ u8 next_table_rank[0x2];
+
+ u8 sx_wire[0x1];
+ u8 sx_func_lb[0x1];
+ u8 sx_sniffer[0x1];
+ u8 sx_wire_enable[0x1];
+ u8 sx_func_lb_enable[0x1];
+ u8 sx_sniffer_enable[0x1];
+ u8 action_type[0x3];
+ u8 reserved_at_69[0x1];
+ u8 action_description[0x6];
+ u8 gvmi[0x10];
+
+ u8 encap_pointer_vlan_data[0x20];
+
+ u8 loopback_syndome_en[0x8];
+ u8 loopback_syndome[0x8];
+ u8 counter_trigger[0x10];
+
+ u8 miss_address_63_48[0x10];
+ u8 counter_trigger_23_16[0x8];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 learning_point[0x1];
+ u8 go_back[0x1];
+ u8 match_polarity[0x1];
+ u8 mask_mode[0x1];
+ u8 miss_rank[0x2];
+};
+
+struct mlx5_ifc_ste_rx_steering_mult_bits {
+ u8 entry_type[0x4];
+ u8 reserved_at_4[0x4];
+ u8 entry_sub_type[0x8];
+ u8 byte_mask[0x10];
+
+ u8 next_table_base_63_48[0x10];
+ u8 next_lu_type[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 linear_hash_enable[0x1];
+ u8 reserved_at_[0x2];
+ u8 next_table_rank[0x2];
+
+ u8 member_count[0x10];
+ u8 gvmi[0x10];
+
+ u8 qp_list_pointer[0x20];
+
+ u8 reserved_at_a0[0x1];
+ u8 tunneling_action[0x3];
+ u8 action_description[0x4];
+ u8 reserved_at_a8[0x8];
+ u8 counter_trigger_15_0[0x10];
+
+ u8 miss_address_63_48[0x10];
+ u8 counter_trigger_23_16[0x08];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 learning_point[0x1];
+ u8 fail_on_error[0x1];
+ u8 match_polarity[0x1];
+ u8 mask_mode[0x1];
+ u8 miss_rank[0x2];
+};
+
+struct mlx5_ifc_ste_modify_packet_bits {
+ u8 entry_type[0x4];
+ u8 reserved_at_4[0x4];
+ u8 entry_sub_type[0x8];
+ u8 byte_mask[0x10];
+
+ u8 next_table_base_63_48[0x10];
+ u8 next_lu_type[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 linear_hash_enable[0x1];
+ u8 reserved_at_[0x2];
+ u8 next_table_rank[0x2];
+
+ u8 number_of_re_write_actions[0x10];
+ u8 gvmi[0x10];
+
+ u8 header_re_write_actions_pointer[0x20];
+
+ u8 reserved_at_a0[0x1];
+ u8 tunneling_action[0x3];
+ u8 action_description[0x4];
+ u8 reserved_at_a8[0x8];
+ u8 counter_trigger_15_0[0x10];
+
+ u8 miss_address_63_48[0x10];
+ u8 counter_trigger_23_16[0x08];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 learning_point[0x1];
+ u8 fail_on_error[0x1];
+ u8 match_polarity[0x1];
+ u8 mask_mode[0x1];
+ u8 miss_rank[0x2];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_bits {
+ u8 smac_47_16[0x20];
+
+ u8 smac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 qp_type[0x2];
+ u8 ethertype_filter[0x1];
+ u8 reserved_at_43[0x1];
+ u8 sx_sniffer[0x1];
+ u8 force_lb[0x1];
+ u8 functional_lb[0x1];
+ u8 port[0x1];
+ u8 reserved_at_48[0x4];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_qualifier[0x2];
+ u8 reserved_at_52[0x2];
+ u8 first_vlan_id[0xc];
+
+ u8 ip_fragmented[0x1];
+ u8 tcp_syn[0x1];
+ u8 encp_type[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 reserved_at_68[0x4];
+ u8 second_priority[0x3];
+ u8 second_cfi[0x1];
+ u8 second_vlan_qualifier[0x2];
+ u8 reserved_at_72[0x2];
+ u8 second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_dst_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 qp_type[0x2];
+ u8 ethertype_filter[0x1];
+ u8 reserved_at_43[0x1];
+ u8 sx_sniffer[0x1];
+ u8 force_lb[0x1];
+ u8 functional_lb[0x1];
+ u8 port[0x1];
+ u8 reserved_at_48[0x4];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_qualifier[0x2];
+ u8 reserved_at_52[0x2];
+ u8 first_vlan_id[0xc];
+
+ u8 ip_fragmented[0x1];
+ u8 tcp_syn[0x1];
+ u8 encp_type[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 reserved_at_68[0x4];
+ u8 second_priority[0x3];
+ u8 second_cfi[0x1];
+ u8 second_vlan_qualifier[0x2];
+ u8 reserved_at_72[0x2];
+ u8 second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_dst_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 smac_47_32[0x10];
+
+ u8 smac_31_0[0x20];
+
+ u8 sx_sniffer[0x1];
+ u8 force_lb[0x1];
+ u8 functional_lb[0x1];
+ u8 port[0x1];
+ u8 l3_type[0x2];
+ u8 reserved_at_66[0x6];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_qualifier[0x2];
+ u8 reserved_at_72[0x2];
+ u8 first_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_bits {
+ u8 destination_address[0x20];
+
+ u8 source_address[0x20];
+
+ u8 source_port[0x10];
+ u8 destination_port[0x10];
+
+ u8 fragmented[0x1];
+ u8 first_fragment[0x1];
+ u8 reserved_at_62[0x2];
+ u8 reserved_at_64[0x1];
+ u8 ecn[0x2];
+ u8 tcp_ns[0x1];
+ u8 tcp_cwr[0x1];
+ u8 tcp_ece[0x1];
+ u8 tcp_urg[0x1];
+ u8 tcp_ack[0x1];
+ u8 tcp_psh[0x1];
+ u8 tcp_rst[0x1];
+ u8 tcp_syn[0x1];
+ u8 tcp_fin[0x1];
+ u8 dscp[0x6];
+ u8 reserved_at_76[0x2];
+ u8 protocol[0x8];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv6_dst_bits {
+ u8 dst_ip_127_96[0x20];
+
+ u8 dst_ip_95_64[0x20];
+
+ u8 dst_ip_63_32[0x20];
+
+ u8 dst_ip_31_0[0x20];
+};
+
+struct mlx5_ifc_ste_eth_l2_tnl_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 l2_tunneling_network_id[0x20];
+
+ u8 ip_fragmented[0x1];
+ u8 tcp_syn[0x1];
+ u8 encp_type[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 reserved_at_6c[0x3];
+ u8 gre_key_flag[0x1];
+ u8 first_vlan_qualifier[0x2];
+ u8 reserved_at_72[0x2];
+ u8 first_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv6_src_bits {
+ u8 src_ip_127_96[0x20];
+
+ u8 src_ip_95_64[0x20];
+
+ u8 src_ip_63_32[0x20];
+
+ u8 src_ip_31_0[0x20];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_misc_bits {
+ u8 version[0x4];
+ u8 ihl[0x4];
+ u8 reserved_at_8[0x8];
+ u8 total_length[0x10];
+
+ u8 identification[0x10];
+ u8 flags[0x3];
+ u8 fragment_offset[0xd];
+
+ u8 time_to_live[0x8];
+ u8 reserved_at_48[0x8];
+ u8 checksum[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_eth_l4_bits {
+ u8 fragmented[0x1];
+ u8 first_fragment[0x1];
+ u8 reserved_at_2[0x6];
+ u8 protocol[0x8];
+ u8 dst_port[0x10];
+
+ u8 ipv6_version[0x4];
+ u8 reserved_at_24[0x1];
+ u8 ecn[0x2];
+ u8 tcp_ns[0x1];
+ u8 tcp_cwr[0x1];
+ u8 tcp_ece[0x1];
+ u8 tcp_urg[0x1];
+ u8 tcp_ack[0x1];
+ u8 tcp_psh[0x1];
+ u8 tcp_rst[0x1];
+ u8 tcp_syn[0x1];
+ u8 tcp_fin[0x1];
+ u8 src_port[0x10];
+
+ u8 ipv6_payload_length[0x10];
+ u8 ipv6_hop_limit[0x8];
+ u8 dscp[0x6];
+ u8 reserved_at_5e[0x2];
+
+ u8 tcp_data_offset[0x4];
+ u8 reserved_at_64[0x8];
+ u8 flow_label[0x14];
+};
+
+struct mlx5_ifc_ste_eth_l4_misc_bits {
+ u8 checksum[0x10];
+ u8 length[0x10];
+
+ u8 seq_num[0x20];
+
+ u8 ack_num[0x20];
+
+ u8 urgent_pointer[0x10];
+ u8 window_size[0x10];
+};
+
+struct mlx5_ifc_ste_mpls_bits {
+ u8 mpls0_label[0x14];
+ u8 mpls0_exp[0x3];
+ u8 mpls0_s_bos[0x1];
+ u8 mpls0_ttl[0x8];
+
+ u8 mpls1_label[0x20];
+
+ u8 mpls2_label[0x20];
+
+ u8 reserved_at_60[0x16];
+ u8 mpls4_s_bit[0x1];
+ u8 mpls4_qualifier[0x1];
+ u8 mpls3_s_bit[0x1];
+ u8 mpls3_qualifier[0x1];
+ u8 mpls2_s_bit[0x1];
+ u8 mpls2_qualifier[0x1];
+ u8 mpls1_s_bit[0x1];
+ u8 mpls1_qualifier[0x1];
+ u8 mpls0_s_bit[0x1];
+ u8 mpls0_qualifier[0x1];
+};
+
+struct mlx5_ifc_ste_register_0_bits {
+ u8 register_0_h[0x20];
+
+ u8 register_0_l[0x20];
+
+ u8 register_1_h[0x20];
+
+ u8 register_1_l[0x20];
+};
+
+struct mlx5_ifc_ste_register_1_bits {
+ u8 register_2_h[0x20];
+
+ u8 register_2_l[0x20];
+
+ u8 register_3_h[0x20];
+
+ u8 register_3_l[0x20];
+};
+
+struct mlx5_ifc_ste_gre_bits {
+ u8 gre_c_present[0x1];
+ u8 reserved_at_30[0x1];
+ u8 gre_k_present[0x1];
+ u8 gre_s_present[0x1];
+ u8 strict_src_route[0x1];
+ u8 recur[0x3];
+ u8 flags[0x5];
+ u8 version[0x3];
+ u8 gre_protocol[0x10];
+
+ u8 checksum[0x10];
+ u8 offset[0x10];
+
+ u8 gre_key_h[0x18];
+ u8 gre_key_l[0x8];
+
+ u8 seq_num[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_0_bits {
+ u8 flex_parser_3[0x20];
+
+ u8 flex_parser_2[0x20];
+
+ u8 flex_parser_1[0x20];
+
+ u8 flex_parser_0[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_1_bits {
+ u8 flex_parser_7[0x20];
+
+ u8 flex_parser_6[0x20];
+
+ u8 flex_parser_5[0x20];
+
+ u8 flex_parser_4[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_ok_bits {
+ u8 flex_parser_3[0x20];
+ u8 flex_parser_2[0x20];
+ u8 flex_parsers_ok[0x8];
+ u8 reserved_at_48[0x18];
+ u8 flex_parser_0[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_tnl_bits {
+ u8 flex_parser_tunneling_header_63_32[0x20];
+
+ u8 flex_parser_tunneling_header_31_0[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ste_flex_parser_tnl_vxlan_gpe_bits {
+ u8 outer_vxlan_gpe_flags[0x8];
+ u8 reserved_at_8[0x10];
+ u8 outer_vxlan_gpe_next_protocol[0x8];
+
+ u8 outer_vxlan_gpe_vni[0x18];
+ u8 reserved_at_38[0x8];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ste_flex_parser_tnl_geneve_bits {
+ u8 reserved_at_0[0x2];
+ u8 geneve_opt_len[0x6];
+ u8 geneve_oam[0x1];
+ u8 reserved_at_9[0x7];
+ u8 geneve_protocol_type[0x10];
+
+ u8 geneve_vni[0x18];
+ u8 reserved_at_38[0x8];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits {
+ u8 reserved_at_0[0x5];
+ u8 gtpu_msg_flags[0x3];
+ u8 gtpu_msg_type[0x8];
+ u8 reserved_at_10[0x10];
+
+ u8 gtpu_teid[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ste_tunnel_header_bits {
+ u8 tunnel_header_0[0x20];
+
+ u8 tunnel_header_1[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ste_general_purpose_bits {
+ u8 general_purpose_lookup_field[0x20];
+
+ u8 reserved_at_20[0x20];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_src_gvmi_qp_bits {
+ u8 loopback_syndrome[0x8];
+ u8 reserved_at_8[0x8];
+ u8 source_gvmi[0x10];
+
+ u8 reserved_at_20[0x5];
+ u8 force_lb[0x1];
+ u8 functional_lb[0x1];
+ u8 source_is_requestor[0x1];
+ u8 source_qp[0x18];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_l2_hdr_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 smac_47_32[0x10];
+
+ u8 smac_31_0[0x20];
+
+ u8 ethertype[0x10];
+ u8 vlan_type[0x10];
+
+ u8 vlan[0x10];
+ u8 reserved_at_90[0x10];
+};
+
+/* Both HW set and HW add share the same HW format with different opcodes */
+struct mlx5_ifc_dr_action_hw_set_bits {
+ u8 opcode[0x8];
+ u8 destination_field_code[0x8];
+ u8 reserved_at_10[0x2];
+ u8 destination_left_shifter[0x6];
+ u8 reserved_at_18[0x3];
+ u8 destination_length[0x5];
+
+ u8 inline_data[0x20];
+};
+
+struct mlx5_ifc_dr_action_hw_copy_bits {
+ u8 opcode[0x8];
+ u8 destination_field_code[0x8];
+ u8 reserved_at_10[0x2];
+ u8 destination_left_shifter[0x6];
+ u8 reserved_at_18[0x2];
+ u8 destination_length[0x6];
+
+ u8 reserved_at_20[0x8];
+ u8 source_field_code[0x8];
+ u8 reserved_at_30[0x2];
+ u8 source_left_shifter[0x6];
+ u8 reserved_at_38[0x8];
+};
+
+enum {
+ MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ = 2,
+};
+
+struct mlx5_ifc_ste_aso_flow_meter_action_bits {
+ u8 reserved_at_0[0xc];
+ u8 action[0x1];
+ u8 initial_color[0x2];
+ u8 line_id[0x1];
+};
+
+struct mlx5_ifc_ste_double_action_aso_v1_bits {
+ u8 action_id[0x8];
+ u8 aso_context_number[0x18];
+
+ u8 dest_reg_id[0x2];
+ u8 change_ordering_tag[0x1];
+ u8 aso_check_ordering[0x1];
+ u8 aso_context_type[0x4];
+ u8 reserved_at_28[0x8];
+ union {
+ u8 aso_fields[0x10];
+ struct mlx5_ifc_ste_aso_flow_meter_action_bits flow_meter;
+ };
+};
+
+#endif /* MLX5_IFC_DR_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
new file mode 100644
index 000000000..34c2bd17a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
@@ -0,0 +1,434 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */
+
+#ifndef MLX5_IFC_DR_STE_V1_H
+#define MLX5_IFC_DR_STE_V1_H
+
+enum mlx5_ifc_ste_v1_modify_hdr_offset {
+ MLX5_MODIFY_HEADER_V1_QW_OFFSET = 0x20,
+};
+
+struct mlx5_ifc_ste_single_action_flow_tag_v1_bits {
+ u8 action_id[0x8];
+ u8 flow_tag[0x18];
+};
+
+struct mlx5_ifc_ste_single_action_modify_list_v1_bits {
+ u8 action_id[0x8];
+ u8 num_of_modify_actions[0x8];
+ u8 modify_actions_ptr[0x10];
+};
+
+struct mlx5_ifc_ste_single_action_remove_header_v1_bits {
+ u8 action_id[0x8];
+ u8 reserved_at_8[0x2];
+ u8 start_anchor[0x6];
+ u8 reserved_at_10[0x2];
+ u8 end_anchor[0x6];
+ u8 reserved_at_18[0x4];
+ u8 decap[0x1];
+ u8 vni_to_cqe[0x1];
+ u8 qos_profile[0x2];
+};
+
+struct mlx5_ifc_ste_single_action_remove_header_size_v1_bits {
+ u8 action_id[0x8];
+ u8 reserved_at_8[0x2];
+ u8 start_anchor[0x6];
+ u8 outer_l4_remove[0x1];
+ u8 reserved_at_11[0x1];
+ u8 start_offset[0x7];
+ u8 reserved_at_18[0x1];
+ u8 remove_size[0x6];
+};
+
+struct mlx5_ifc_ste_double_action_copy_v1_bits {
+ u8 action_id[0x8];
+ u8 destination_dw_offset[0x8];
+ u8 reserved_at_10[0x2];
+ u8 destination_left_shifter[0x6];
+ u8 reserved_at_17[0x2];
+ u8 destination_length[0x6];
+
+ u8 reserved_at_20[0x8];
+ u8 source_dw_offset[0x8];
+ u8 reserved_at_30[0x2];
+ u8 source_right_shifter[0x6];
+ u8 reserved_at_38[0x8];
+};
+
+struct mlx5_ifc_ste_double_action_set_v1_bits {
+ u8 action_id[0x8];
+ u8 destination_dw_offset[0x8];
+ u8 reserved_at_10[0x2];
+ u8 destination_left_shifter[0x6];
+ u8 reserved_at_18[0x2];
+ u8 destination_length[0x6];
+
+ u8 inline_data[0x20];
+};
+
+struct mlx5_ifc_ste_double_action_add_v1_bits {
+ u8 action_id[0x8];
+ u8 destination_dw_offset[0x8];
+ u8 reserved_at_10[0x2];
+ u8 destination_left_shifter[0x6];
+ u8 reserved_at_18[0x2];
+ u8 destination_length[0x6];
+
+ u8 add_value[0x20];
+};
+
+struct mlx5_ifc_ste_double_action_insert_with_inline_v1_bits {
+ u8 action_id[0x8];
+ u8 reserved_at_8[0x2];
+ u8 start_anchor[0x6];
+ u8 start_offset[0x7];
+ u8 reserved_at_17[0x9];
+
+ u8 inline_data[0x20];
+};
+
+struct mlx5_ifc_ste_double_action_insert_with_ptr_v1_bits {
+ u8 action_id[0x8];
+ u8 reserved_at_8[0x2];
+ u8 start_anchor[0x6];
+ u8 start_offset[0x7];
+ u8 size[0x6];
+ u8 attributes[0x3];
+
+ u8 pointer[0x20];
+};
+
+struct mlx5_ifc_ste_double_action_modify_action_list_v1_bits {
+ u8 action_id[0x8];
+ u8 modify_actions_pattern_pointer[0x18];
+
+ u8 number_of_modify_actions[0x8];
+ u8 modify_actions_argument_pointer[0x18];
+};
+
+struct mlx5_ifc_ste_match_bwc_v1_bits {
+ u8 entry_format[0x8];
+ u8 counter_id[0x18];
+
+ u8 miss_address_63_48[0x10];
+ u8 match_definer_ctx_idx[0x8];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 reserved_at_5a[0x1];
+ u8 match_polarity[0x1];
+ u8 reparse[0x1];
+ u8 reserved_at_5d[0x3];
+
+ u8 next_table_base_63_48[0x10];
+ u8 hash_definer_ctx_idx[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 hash_type[0x2];
+ u8 hash_after_actions[0x1];
+ u8 reserved_at_9e[0x2];
+
+ u8 byte_mask[0x10];
+ u8 next_entry_format[0x1];
+ u8 mask_mode[0x1];
+ u8 gvmi[0xe];
+
+ u8 action[0x40];
+};
+
+struct mlx5_ifc_ste_mask_and_match_v1_bits {
+ u8 entry_format[0x8];
+ u8 counter_id[0x18];
+
+ u8 miss_address_63_48[0x10];
+ u8 match_definer_ctx_idx[0x8];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 reserved_at_5a[0x1];
+ u8 match_polarity[0x1];
+ u8 reparse[0x1];
+ u8 reserved_at_5d[0x3];
+
+ u8 next_table_base_63_48[0x10];
+ u8 hash_definer_ctx_idx[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 hash_type[0x2];
+ u8 hash_after_actions[0x1];
+ u8 reserved_at_9e[0x2];
+
+ u8 action[0x60];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_v1_bits {
+ u8 reserved_at_0[0x1];
+ u8 sx_sniffer[0x1];
+ u8 functional_loopback[0x1];
+ u8 ip_fragmented[0x1];
+ u8 qp_type[0x2];
+ u8 encapsulation_type[0x2];
+ u8 port[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 first_vlan_qualifier[0x2];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_id[0xc];
+
+ u8 smac_47_16[0x20];
+
+ u8 smac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 reserved_at_60[0x6];
+ u8 tcp_syn[0x1];
+ u8 reserved_at_67[0x3];
+ u8 force_loopback[0x1];
+ u8 l2_ok[0x1];
+ u8 l3_ok[0x1];
+ u8 l4_ok[0x1];
+ u8 second_vlan_qualifier[0x2];
+
+ u8 second_priority[0x3];
+ u8 second_cfi[0x1];
+ u8 second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_dst_v1_bits {
+ u8 reserved_at_0[0x1];
+ u8 sx_sniffer[0x1];
+ u8 functional_lb[0x1];
+ u8 ip_fragmented[0x1];
+ u8 qp_type[0x2];
+ u8 encapsulation_type[0x2];
+ u8 port[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 first_vlan_qualifier[0x2];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_id[0xc];
+
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 reserved_at_60[0x6];
+ u8 tcp_syn[0x1];
+ u8 reserved_at_67[0x3];
+ u8 force_lb[0x1];
+ u8 l2_ok[0x1];
+ u8 l3_ok[0x1];
+ u8 l4_ok[0x1];
+ u8 second_vlan_qualifier[0x2];
+ u8 second_priority[0x3];
+ u8 second_cfi[0x1];
+ u8 second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_dst_v1_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 smac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 reserved_at_50[0x2];
+ u8 functional_lb[0x1];
+ u8 reserved_at_53[0x5];
+ u8 port[0x2];
+ u8 l3_type[0x2];
+ u8 reserved_at_5c[0x2];
+ u8 first_vlan_qualifier[0x2];
+
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_id[0xc];
+ u8 smac_15_0[0x10];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_v1_bits {
+ u8 source_address[0x20];
+
+ u8 destination_address[0x20];
+
+ u8 source_port[0x10];
+ u8 destination_port[0x10];
+
+ u8 reserved_at_60[0x4];
+ u8 l4_ok[0x1];
+ u8 l3_ok[0x1];
+ u8 fragmented[0x1];
+ u8 tcp_ns[0x1];
+ u8 tcp_cwr[0x1];
+ u8 tcp_ece[0x1];
+ u8 tcp_urg[0x1];
+ u8 tcp_ack[0x1];
+ u8 tcp_psh[0x1];
+ u8 tcp_rst[0x1];
+ u8 tcp_syn[0x1];
+ u8 tcp_fin[0x1];
+ u8 dscp[0x6];
+ u8 ecn[0x2];
+ u8 protocol[0x8];
+};
+
+struct mlx5_ifc_ste_eth_l2_tnl_v1_bits {
+ u8 l2_tunneling_network_id[0x20];
+
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 reserved_at_60[0x3];
+ u8 ip_fragmented[0x1];
+ u8 reserved_at_64[0x2];
+ u8 encp_type[0x2];
+ u8 reserved_at_68[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 first_vlan_qualifier[0x2];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_misc_v1_bits {
+ u8 identification[0x10];
+ u8 flags[0x3];
+ u8 fragment_offset[0xd];
+
+ u8 total_length[0x10];
+ u8 checksum[0x10];
+
+ u8 version[0x4];
+ u8 ihl[0x4];
+ u8 time_to_live[0x8];
+ u8 reserved_at_50[0x10];
+
+ u8 reserved_at_60[0x1c];
+ u8 voq_internal_prio[0x4];
+};
+
+struct mlx5_ifc_ste_eth_l4_v1_bits {
+ u8 ipv6_version[0x4];
+ u8 reserved_at_4[0x4];
+ u8 dscp[0x6];
+ u8 ecn[0x2];
+ u8 ipv6_hop_limit[0x8];
+ u8 protocol[0x8];
+
+ u8 src_port[0x10];
+ u8 dst_port[0x10];
+
+ u8 first_fragment[0x1];
+ u8 reserved_at_41[0xb];
+ u8 flow_label[0x14];
+
+ u8 tcp_data_offset[0x4];
+ u8 l4_ok[0x1];
+ u8 l3_ok[0x1];
+ u8 fragmented[0x1];
+ u8 tcp_ns[0x1];
+ u8 tcp_cwr[0x1];
+ u8 tcp_ece[0x1];
+ u8 tcp_urg[0x1];
+ u8 tcp_ack[0x1];
+ u8 tcp_psh[0x1];
+ u8 tcp_rst[0x1];
+ u8 tcp_syn[0x1];
+ u8 tcp_fin[0x1];
+ u8 ipv6_paylen[0x10];
+};
+
+struct mlx5_ifc_ste_eth_l4_misc_v1_bits {
+ u8 window_size[0x10];
+ u8 urgent_pointer[0x10];
+
+ u8 ack_num[0x20];
+
+ u8 seq_num[0x20];
+
+ u8 length[0x10];
+ u8 checksum[0x10];
+};
+
+struct mlx5_ifc_ste_mpls_v1_bits {
+ u8 reserved_at_0[0x15];
+ u8 mpls_ok[0x1];
+ u8 mpls4_s_bit[0x1];
+ u8 mpls4_qualifier[0x1];
+ u8 mpls3_s_bit[0x1];
+ u8 mpls3_qualifier[0x1];
+ u8 mpls2_s_bit[0x1];
+ u8 mpls2_qualifier[0x1];
+ u8 mpls1_s_bit[0x1];
+ u8 mpls1_qualifier[0x1];
+ u8 mpls0_s_bit[0x1];
+ u8 mpls0_qualifier[0x1];
+
+ u8 mpls0_label[0x14];
+ u8 mpls0_exp[0x3];
+ u8 mpls0_s_bos[0x1];
+ u8 mpls0_ttl[0x8];
+
+ u8 mpls1_label[0x20];
+
+ u8 mpls2_label[0x20];
+};
+
+struct mlx5_ifc_ste_gre_v1_bits {
+ u8 gre_c_present[0x1];
+ u8 reserved_at_1[0x1];
+ u8 gre_k_present[0x1];
+ u8 gre_s_present[0x1];
+ u8 strict_src_route[0x1];
+ u8 recur[0x3];
+ u8 flags[0x5];
+ u8 version[0x3];
+ u8 gre_protocol[0x10];
+
+ u8 reserved_at_20[0x20];
+
+ u8 gre_key_h[0x18];
+ u8 gre_key_l[0x8];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_src_gvmi_qp_v1_bits {
+ u8 loopback_synd[0x8];
+ u8 reserved_at_8[0x7];
+ u8 functional_lb[0x1];
+ u8 source_gvmi[0x10];
+
+ u8 force_lb[0x1];
+ u8 reserved_at_21[0x1];
+ u8 source_is_requestor[0x1];
+ u8 reserved_at_23[0x5];
+ u8 source_qp[0x18];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_icmp_v1_bits {
+ u8 icmp_payload_data[0x20];
+
+ u8 icmp_header_data[0x20];
+
+ u8 icmp_type[0x8];
+ u8 icmp_code[0x8];
+ u8 reserved_at_50[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+#endif /* MLX5_IFC_DR_STE_V1_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
new file mode 100644
index 000000000..226a0d7bb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef _MLX5DR_H_
+#define _MLX5DR_H_
+
+struct mlx5dr_domain;
+struct mlx5dr_table;
+struct mlx5dr_matcher;
+struct mlx5dr_rule;
+struct mlx5dr_action;
+
+enum mlx5dr_domain_type {
+ MLX5DR_DOMAIN_TYPE_NIC_RX,
+ MLX5DR_DOMAIN_TYPE_NIC_TX,
+ MLX5DR_DOMAIN_TYPE_FDB,
+};
+
+enum mlx5dr_domain_sync_flags {
+ MLX5DR_DOMAIN_SYNC_FLAGS_SW = 1 << 0,
+ MLX5DR_DOMAIN_SYNC_FLAGS_HW = 1 << 1,
+};
+
+enum mlx5dr_action_reformat_type {
+ DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2,
+ DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2,
+ DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2,
+ DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3,
+ DR_ACTION_REFORMAT_TYP_INSERT_HDR,
+ DR_ACTION_REFORMAT_TYP_REMOVE_HDR,
+};
+
+struct mlx5dr_match_parameters {
+ size_t match_sz;
+ u64 *match_buf; /* Device spec format */
+};
+
+struct mlx5dr_action_dest {
+ struct mlx5dr_action *dest;
+ struct mlx5dr_action *reformat;
+};
+
+struct mlx5dr_domain *
+mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type);
+
+int mlx5dr_domain_destroy(struct mlx5dr_domain *domain);
+
+int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
+
+void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain *peer_dmn);
+
+struct mlx5dr_table *
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags,
+ u16 uid);
+
+struct mlx5dr_table *
+mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft);
+
+int mlx5dr_table_destroy(struct mlx5dr_table *table);
+
+u32 mlx5dr_table_get_id(struct mlx5dr_table *table);
+
+struct mlx5dr_matcher *
+mlx5dr_matcher_create(struct mlx5dr_table *table,
+ u32 priority,
+ u8 match_criteria_enable,
+ struct mlx5dr_match_parameters *mask);
+
+int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher);
+
+struct mlx5dr_rule *
+mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *value,
+ size_t num_actions,
+ struct mlx5dr_action *actions[],
+ u32 flow_source);
+
+int mlx5dr_rule_destroy(struct mlx5dr_rule *rule);
+
+int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ struct mlx5dr_action *action);
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num);
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_table(struct mlx5dr_table *table);
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain,
+ struct mlx5_flow_table *ft);
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
+ u16 vport, u8 vhca_id_valid,
+ u16 vhca_id);
+
+struct mlx5dr_action *
+mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action_dest *dests,
+ u32 num_of_dests,
+ bool ignore_flow_level,
+ u32 flow_source);
+
+struct mlx5dr_action *mlx5dr_action_create_drop(void);
+
+struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value);
+
+struct mlx5dr_action *
+mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id);
+
+struct mlx5dr_action *
+mlx5dr_action_create_flow_counter(u32 counter_id);
+
+struct mlx5dr_action *
+mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
+ enum mlx5dr_action_reformat_type reformat_type,
+ u8 reformat_param_0,
+ u8 reformat_param_1,
+ size_t data_sz,
+ void *data);
+
+struct mlx5dr_action *
+mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain,
+ u32 flags,
+ size_t actions_sz,
+ __be64 actions[]);
+
+struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void);
+
+struct mlx5dr_action *
+mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr);
+
+struct mlx5dr_action *
+mlx5dr_action_create_aso(struct mlx5dr_domain *dmn,
+ u32 obj_id,
+ u8 return_reg_id,
+ u8 aso_type,
+ u8 init_color,
+ u8 meter_id);
+
+int mlx5dr_action_destroy(struct mlx5dr_action *action);
+
+static inline bool
+mlx5dr_is_supported(struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN(dev, roce) &&
+ (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
+ (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
+ (MLX5_CAP_GEN(dev, steering_format_version) <=
+ MLX5_STEERING_FORMAT_CONNECTX_7)));
+}
+
+/* buddy functions & structure */
+
+struct mlx5dr_icm_mr;
+
+struct mlx5dr_icm_buddy_mem {
+ unsigned long **bitmap;
+ unsigned int *num_free;
+ u32 max_order;
+ struct list_head list_node;
+ struct mlx5dr_icm_mr *icm_mr;
+ struct mlx5dr_icm_pool *pool;
+
+ /* This is the list of used chunks. HW may be accessing this memory */
+ struct list_head used_list;
+ u64 used_memory;
+
+ /* Hardware may be accessing this memory but at some future,
+ * undetermined time, it might cease to do so.
+ * sync_ste command sets them free.
+ */
+ struct list_head hot_list;
+
+ /* Memory optimisation */
+ struct mlx5dr_ste *ste_arr;
+ struct list_head *miss_list;
+ u8 *hw_ste_arr;
+};
+
+int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int max_order);
+void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy);
+int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int order,
+ unsigned int *segment);
+void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy,
+ unsigned int seg, unsigned int order);
+
+#endif /* _MLX5DR_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
new file mode 100644
index 000000000..b6931bbe5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include <linux/mlx5/transobj.h>
+
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+
+ err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out);
+ if (!err)
+ *tdn = MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
+
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {};
+
+ MLX5_SET(dealloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
+ mlx5_cmd_exec_in(dev, dealloc_transport_domain, in);
+}
+EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
+
+int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {};
+ int err;
+
+ MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *rqn = MLX5_GET(create_rq_out, out, rqn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq);
+
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in)
+{
+ MLX5_SET(modify_rq_in, in, rqn, rqn);
+ MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ);
+
+ return mlx5_cmd_exec_in(dev, modify_rq, in);
+}
+EXPORT_SYMBOL(mlx5_core_modify_rq);
+
+void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
+
+ MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, in, rqn, rqn);
+ mlx5_cmd_exec_in(dev, destroy_rq, in);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq);
+
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {};
+
+ MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
+ MLX5_SET(query_rq_in, in, rqn, rqn);
+
+ return mlx5_cmd_exec_inout(dev, query_rq, in, out);
+}
+EXPORT_SYMBOL(mlx5_core_query_rq);
+
+int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {};
+ int err;
+
+ MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *sqn = MLX5_GET(create_sq_out, out, sqn);
+
+ return err;
+}
+
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in)
+{
+ MLX5_SET(modify_sq_in, in, sqn, sqn);
+ MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ);
+ return mlx5_cmd_exec_in(dev, modify_sq, in);
+}
+EXPORT_SYMBOL(mlx5_core_modify_sq);
+
+void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {};
+
+ MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, in, sqn, sqn);
+ mlx5_cmd_exec_in(dev, destroy_sq, in);
+}
+
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {};
+
+ MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
+ MLX5_SET(query_sq_in, in, sqn, sqn);
+ return mlx5_cmd_exec_inout(dev, query_sq, in, out);
+}
+EXPORT_SYMBOL(mlx5_core_query_sq);
+
+int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state)
+{
+ void *out;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+ out = kvzalloc(inlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_sq(dev, sqn, out);
+ if (err)
+ goto out;
+
+ sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+ *state = MLX5_GET(sqc, sqc, state);
+
+out:
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state);
+
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, u32 *tirn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
+ int err;
+
+ MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+ err = mlx5_cmd_exec_inout(dev, create_tir, in, out);
+ if (!err)
+ *tirn = MLX5_GET(create_tir_out, out, tirn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_tir);
+
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in)
+{
+ MLX5_SET(modify_tir_in, in, tirn, tirn);
+ MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR);
+ return mlx5_cmd_exec_in(dev, modify_tir, in);
+}
+
+void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
+
+ MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, in, tirn, tirn);
+ mlx5_cmd_exec_in(dev, destroy_tir, in);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_tir);
+
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, u32 *tisn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {};
+ int err;
+
+ MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
+ err = mlx5_cmd_exec_inout(dev, create_tis, in, out);
+ if (!err)
+ *tisn = MLX5_GET(create_tis_out, out, tisn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_tis);
+
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in)
+{
+ MLX5_SET(modify_tis_in, in, tisn, tisn);
+ MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
+
+ return mlx5_cmd_exec_in(dev, modify_tis, in);
+}
+EXPORT_SYMBOL(mlx5_core_modify_tis);
+
+void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
+
+ MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, tisn, tisn);
+ mlx5_cmd_exec_in(dev, destroy_tis, in);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_tis);
+
+int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *rqtn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
+ int err;
+
+ MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *rqtn = MLX5_GET(create_rqt_out, out, rqtn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rqt);
+
+int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {};
+
+ MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
+ MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
+
+ MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
+ mlx5_cmd_exec_in(dev, destroy_rqt, in);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rqt);
+
+static int mlx5_hairpin_create_rq(struct mlx5_core_dev *mdev,
+ struct mlx5_hairpin_params *params, u32 *rqn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_rq_in)] = {0};
+ void *rqc, *wq;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ MLX5_SET(rqc, rqc, hairpin, 1);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, counter_set_id, params->q_counter);
+
+ MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size);
+ MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets);
+
+ return mlx5_core_create_rq(mdev, in, MLX5_ST_SZ_BYTES(create_rq_in), rqn);
+}
+
+static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev,
+ struct mlx5_hairpin_params *params, u32 *sqn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_sq_in)] = {0};
+ void *sqc, *wq;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(sqc, sqc, hairpin, 1);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+
+ MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size);
+ MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets);
+
+ return mlx5_core_create_sq(mdev, in, MLX5_ST_SZ_BYTES(create_sq_in), sqn);
+}
+
+static int mlx5_hairpin_create_queues(struct mlx5_hairpin *hp,
+ struct mlx5_hairpin_params *params)
+{
+ int i, j, err;
+
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn[i]);
+ if (err)
+ goto out_err_rq;
+ }
+
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn[i]);
+ if (err)
+ goto out_err_sq;
+ }
+
+ return 0;
+
+out_err_sq:
+ for (j = 0; j < i; j++)
+ mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[j]);
+ i = hp->num_channels;
+out_err_rq:
+ for (j = 0; j < i; j++)
+ mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[j]);
+ return err;
+}
+
+static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ for (i = 0; i < hp->num_channels; i++) {
+ mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]);
+ if (!hp->peer_gone)
+ mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+ }
+}
+
+static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn,
+ int curr_state, int next_state,
+ u16 peer_vhca, u32 peer_sq)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {};
+ void *rqc;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ if (next_state == MLX5_RQC_STATE_RDY) {
+ MLX5_SET(rqc, rqc, hairpin_peer_sq, peer_sq);
+ MLX5_SET(rqc, rqc, hairpin_peer_vhca, peer_vhca);
+ }
+
+ MLX5_SET(modify_rq_in, in, rq_state, curr_state);
+ MLX5_SET(rqc, rqc, state, next_state);
+
+ return mlx5_core_modify_rq(func_mdev, rqn, in);
+}
+
+static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn,
+ int curr_state, int next_state,
+ u16 peer_vhca, u32 peer_rq)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_sq_in)] = {0};
+ void *sqc;
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+ if (next_state == MLX5_SQC_STATE_RDY) {
+ MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq);
+ MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca);
+ }
+
+ MLX5_SET(modify_sq_in, in, sq_state, curr_state);
+ MLX5_SET(sqc, sqc, state, next_state);
+
+ return mlx5_core_modify_sq(peer_mdev, sqn, in);
+}
+
+static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp)
+{
+ int i, j, err;
+
+ /* set peer SQs */
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i],
+ MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY,
+ MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn[i]);
+ if (err)
+ goto err_modify_sq;
+ }
+
+ /* set func RQs */
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i],
+ MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY,
+ MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn[i]);
+ if (err)
+ goto err_modify_rq;
+ }
+
+ return 0;
+
+err_modify_rq:
+ for (j = 0; j < i; j++)
+ mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[j], MLX5_RQC_STATE_RDY,
+ MLX5_RQC_STATE_RST, 0, 0);
+ i = hp->num_channels;
+err_modify_sq:
+ for (j = 0; j < i; j++)
+ mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[j], MLX5_SQC_STATE_RDY,
+ MLX5_SQC_STATE_RST, 0, 0);
+ return err;
+}
+
+static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
+ MLX5_SQC_STATE_RST, 0, 0);
+}
+
+static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ /* unset func RQs */
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY,
+ MLX5_RQC_STATE_RST, 0, 0);
+ /* unset peer SQs */
+ if (!hp->peer_gone)
+ mlx5_hairpin_unpair_peer_sq(hp);
+}
+
+struct mlx5_hairpin *
+mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev,
+ struct mlx5_core_dev *peer_mdev,
+ struct mlx5_hairpin_params *params)
+{
+ struct mlx5_hairpin *hp;
+ int size, err;
+
+ size = sizeof(*hp) + params->num_channels * 2 * sizeof(u32);
+ hp = kzalloc(size, GFP_KERNEL);
+ if (!hp)
+ return ERR_PTR(-ENOMEM);
+
+ hp->func_mdev = func_mdev;
+ hp->peer_mdev = peer_mdev;
+ hp->num_channels = params->num_channels;
+
+ hp->rqn = (void *)hp + sizeof(*hp);
+ hp->sqn = hp->rqn + params->num_channels;
+
+ /* alloc and pair func --> peer hairpin */
+ err = mlx5_hairpin_create_queues(hp, params);
+ if (err)
+ goto err_create_queues;
+
+ err = mlx5_hairpin_pair_queues(hp);
+ if (err)
+ goto err_pair_queues;
+
+ return hp;
+
+err_pair_queues:
+ mlx5_hairpin_destroy_queues(hp);
+err_create_queues:
+ kfree(hp);
+ return ERR_PTR(err);
+}
+
+void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp)
+{
+ mlx5_hairpin_unpair_queues(hp);
+ mlx5_hairpin_destroy_queues(hp);
+ kfree(hp);
+}
+
+void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ mlx5_hairpin_unpair_peer_sq(hp);
+
+ /* destroy peer SQ */
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+
+ hp->peer_gone = true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
new file mode 100644
index 000000000..8455e79bc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+static int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+ err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
+ if (err)
+ return err;
+
+ *uarn = MLX5_GET(alloc_uar_out, out, uar);
+ return 0;
+}
+
+static int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
+
+ MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+ MLX5_SET(dealloc_uar_in, in, uar, uarn);
+ return mlx5_cmd_exec_in(dev, dealloc_uar, in);
+}
+
+static int uars_per_sys_page(struct mlx5_core_dev *mdev)
+{
+ if (MLX5_CAP_GEN(mdev, uar_4k))
+ return MLX5_CAP_GEN(mdev, num_of_uars_per_page);
+
+ return 1;
+}
+
+static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index)
+{
+ u32 system_page_index;
+
+ if (MLX5_CAP_GEN(mdev, uar_4k))
+ system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT);
+ else
+ system_page_index = index;
+
+ return (mdev->bar_addr >> PAGE_SHIFT) + system_page_index;
+}
+
+static void up_rel_func(struct kref *kref)
+{
+ struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count);
+
+ list_del(&up->list);
+ iounmap(up->map);
+ if (mlx5_cmd_free_uar(up->mdev, up->index))
+ mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
+ bitmap_free(up->reg_bitmap);
+ bitmap_free(up->fp_bitmap);
+ kfree(up);
+}
+
+static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev,
+ bool map_wc)
+{
+ struct mlx5_uars_page *up;
+ int err = -ENOMEM;
+ phys_addr_t pfn;
+ int bfregs;
+ int node;
+ int i;
+
+ bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR;
+ node = mdev->priv.numa_node;
+ up = kzalloc_node(sizeof(*up), GFP_KERNEL, node);
+ if (!up)
+ return ERR_PTR(err);
+
+ up->mdev = mdev;
+ up->reg_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node);
+ if (!up->reg_bitmap)
+ goto error1;
+
+ up->fp_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node);
+ if (!up->fp_bitmap)
+ goto error1;
+
+ for (i = 0; i < bfregs; i++)
+ if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR)
+ set_bit(i, up->reg_bitmap);
+ else
+ set_bit(i, up->fp_bitmap);
+
+ up->bfregs = bfregs;
+ up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
+ up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
+
+ err = mlx5_cmd_alloc_uar(mdev, &up->index);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err);
+ goto error1;
+ }
+
+ pfn = uar2pfn(mdev, up->index);
+ if (map_wc) {
+ up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!up->map) {
+ err = -EAGAIN;
+ goto error2;
+ }
+ } else {
+ up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!up->map) {
+ err = -ENOMEM;
+ goto error2;
+ }
+ }
+ kref_init(&up->ref_count);
+ mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n",
+ up->index, up->bfregs);
+ return up;
+
+error2:
+ if (mlx5_cmd_free_uar(mdev, up->index))
+ mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index);
+error1:
+ bitmap_free(up->fp_bitmap);
+ bitmap_free(up->reg_bitmap);
+ kfree(up);
+ return ERR_PTR(err);
+}
+
+struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_uars_page *ret;
+
+ mutex_lock(&mdev->priv.bfregs.reg_head.lock);
+ if (!list_empty(&mdev->priv.bfregs.reg_head.list)) {
+ ret = list_first_entry(&mdev->priv.bfregs.reg_head.list,
+ struct mlx5_uars_page, list);
+ kref_get(&ret->ref_count);
+ goto out;
+ }
+ ret = alloc_uars_page(mdev, false);
+ if (IS_ERR(ret))
+ goto out;
+ list_add(&ret->list, &mdev->priv.bfregs.reg_head.list);
+out:
+ mutex_unlock(&mdev->priv.bfregs.reg_head.lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(mlx5_get_uars_page);
+
+void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up)
+{
+ mutex_lock(&mdev->priv.bfregs.reg_head.lock);
+ kref_put(&up->ref_count, up_rel_func);
+ mutex_unlock(&mdev->priv.bfregs.reg_head.lock);
+}
+EXPORT_SYMBOL(mlx5_put_uars_page);
+
+static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi)
+{
+ /* return the offset in bytes from the start of the page to the
+ * blue flame area of the UAR
+ */
+ return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE +
+ (dbi % MLX5_BFREGS_PER_UAR) *
+ (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET;
+}
+
+static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+ bool map_wc, bool fast_path)
+{
+ struct mlx5_bfreg_data *bfregs;
+ struct mlx5_uars_page *up;
+ struct list_head *head;
+ unsigned long *bitmap;
+ unsigned int *avail;
+ struct mutex *lock; /* pointer to right mutex */
+ int dbi;
+
+ bfregs = &mdev->priv.bfregs;
+ if (map_wc) {
+ head = &bfregs->wc_head.list;
+ lock = &bfregs->wc_head.lock;
+ } else {
+ head = &bfregs->reg_head.list;
+ lock = &bfregs->reg_head.lock;
+ }
+ mutex_lock(lock);
+ if (list_empty(head)) {
+ up = alloc_uars_page(mdev, map_wc);
+ if (IS_ERR(up)) {
+ mutex_unlock(lock);
+ return PTR_ERR(up);
+ }
+ list_add(&up->list, head);
+ } else {
+ up = list_entry(head->next, struct mlx5_uars_page, list);
+ kref_get(&up->ref_count);
+ }
+ if (fast_path) {
+ bitmap = up->fp_bitmap;
+ avail = &up->fp_avail;
+ } else {
+ bitmap = up->reg_bitmap;
+ avail = &up->reg_avail;
+ }
+ dbi = find_first_bit(bitmap, up->bfregs);
+ clear_bit(dbi, bitmap);
+ (*avail)--;
+ if (!(*avail))
+ list_del(&up->list);
+
+ bfreg->map = up->map + map_offset(mdev, dbi);
+ bfreg->up = up;
+ bfreg->wc = map_wc;
+ bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR;
+ mutex_unlock(lock);
+
+ return 0;
+}
+
+int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+ bool map_wc, bool fast_path)
+{
+ int err;
+
+ err = alloc_bfreg(mdev, bfreg, map_wc, fast_path);
+ if (!err)
+ return 0;
+
+ if (err == -EAGAIN && map_wc)
+ return alloc_bfreg(mdev, bfreg, false, fast_path);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_alloc_bfreg);
+
+static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev,
+ struct mlx5_uars_page *up,
+ struct mlx5_sq_bfreg *bfreg)
+{
+ unsigned int uar_idx;
+ unsigned int bfreg_idx;
+ unsigned int bf_reg_size;
+
+ bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size);
+
+ uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT;
+ bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size;
+
+ return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx;
+}
+
+void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg)
+{
+ struct mlx5_bfreg_data *bfregs;
+ struct mlx5_uars_page *up;
+ struct mutex *lock; /* pointer to right mutex */
+ unsigned int dbi;
+ bool fp;
+ unsigned int *avail;
+ unsigned long *bitmap;
+ struct list_head *head;
+
+ bfregs = &mdev->priv.bfregs;
+ if (bfreg->wc) {
+ head = &bfregs->wc_head.list;
+ lock = &bfregs->wc_head.lock;
+ } else {
+ head = &bfregs->reg_head.list;
+ lock = &bfregs->reg_head.lock;
+ }
+ up = bfreg->up;
+ dbi = addr_to_dbi_in_syspage(mdev, up, bfreg);
+ fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR;
+ if (fp) {
+ avail = &up->fp_avail;
+ bitmap = up->fp_bitmap;
+ } else {
+ avail = &up->reg_avail;
+ bitmap = up->reg_bitmap;
+ }
+ mutex_lock(lock);
+ (*avail)++;
+ set_bit(dbi, bitmap);
+ if (*avail == 1)
+ list_add_tail(&up->list, head);
+
+ kref_put(&up->ref_count, up_rel_func);
+ mutex_unlock(lock);
+}
+EXPORT_SYMBOL(mlx5_free_bfreg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
new file mode 100644
index 000000000..3f68e3198
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -0,0 +1,1173 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/export.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_core.h"
+#include "sf/sf.h"
+
+/* Mutex to hold while enabling or disabling RoCE */
+static DEFINE_MUTEX(mlx5_roce_en_lock);
+
+u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+ int err;
+
+ MLX5_SET(query_vport_state_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_STATE);
+ MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(query_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_state_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+ if (err)
+ return 0;
+
+ return MLX5_GET(query_vport_state_out, out, state);
+}
+
+int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
+ u16 vport, u8 other_vport, u8 state)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {};
+
+ MLX5_SET(modify_vport_state_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_VPORT_STATE);
+ MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(modify_vport_state_in, in, vport_number, vport);
+ MLX5_SET(modify_vport_state_in, in, other_vport, other_vport);
+ MLX5_SET(modify_vport_state_in, in, admin_state, state);
+
+ return mlx5_cmd_exec_in(mdev, modify_vport_state, in);
+}
+
+static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
+ u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ return mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out);
+}
+
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *min_inline)
+{
+ u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {};
+ int err;
+
+ err = mlx5_query_nic_vport_context(mdev, vport, out);
+ if (!err)
+ *min_inline = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.min_wqe_inline_mode);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
+
+void mlx5_query_min_inline(struct mlx5_core_dev *mdev,
+ u8 *min_inline_mode)
+{
+ switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ if (!mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode))
+ break;
+ fallthrough;
+ case MLX5_CAP_INLINE_MODE_L2:
+ *min_inline_mode = MLX5_INLINE_MODE_L2;
+ break;
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ *min_inline_mode = MLX5_INLINE_MODE_NONE;
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(mlx5_query_min_inline);
+
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+ u16 vport, u8 min_inline)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {};
+ void *nic_vport_ctx;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.min_inline, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ min_wqe_inline_mode, min_inline);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ return mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
+}
+
+int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+ u16 vport, bool other, u8 *addr)
+{
+ u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
+ u8 *out_addr;
+ int err;
+
+ out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context.permanent_address);
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, other);
+
+ err = mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out);
+ if (!err)
+ ether_addr_copy(addr, &out_addr[2]);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
+
+int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr)
+{
+ return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_mac_address);
+
+int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+ u16 vport, const u8 *addr)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+ void *nic_vport_ctx;
+ u8 *perm_mac;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.permanent_address, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx,
+ permanent_address);
+
+ ether_addr_copy(&perm_mac[2], addr);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
+
+ kvfree(in);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
+
+int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
+ if (!err)
+ *mtu = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.mtu);
+
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mtu);
+
+int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu);
+
+int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ u16 vport,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int *list_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
+ void *nic_vport_ctx;
+ int max_list_size;
+ int req_list_size;
+ int out_sz;
+ void *out;
+ int err;
+ int i;
+
+ req_list_size = *list_size;
+
+ max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+ 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+ if (req_list_size > max_list_size) {
+ mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n",
+ req_list_size, max_list_size);
+ req_list_size = max_list_size;
+ }
+
+ out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_out) +
+ req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
+ out = kvzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto out;
+
+ nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context);
+ req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size);
+
+ *list_size = req_list_size;
+ for (i = 0; i < req_list_size; i++) {
+ u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]) + 2;
+ ether_addr_copy(addr_list[i], mac_addr);
+ }
+out:
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list);
+
+int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int list_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {};
+ void *nic_vport_ctx;
+ int max_list_size;
+ int in_sz;
+ void *in;
+ int err;
+ int i;
+
+ max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+ 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+ if (list_size > max_list_size)
+ return -ENOSPC;
+
+ in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.addresses_list, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+ nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_type, list_type);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size, list_size);
+
+ for (i = 0; i < list_size; i++) {
+ u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]) + 2;
+ ether_addr_copy(curr_mac, addr_list[i]);
+ }
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
+
+int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
+ u16 vlans[],
+ int list_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ void *nic_vport_ctx;
+ int max_list_size;
+ int in_sz;
+ void *in;
+ int err;
+ int i;
+
+ max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
+
+ if (list_size > max_list_size)
+ return -ENOSPC;
+
+ in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ list_size * MLX5_ST_SZ_BYTES(vlan_layout);
+
+ memset(out, 0, sizeof(out));
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.addresses_list, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+ nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size, list_size);
+
+ for (i = 0; i < list_size; i++) {
+ void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]);
+ MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]);
+ }
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
+
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+ u64 *system_image_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
+ if (err)
+ goto out;
+
+ *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.system_image_guid);
+out:
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out);
+
+ *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.node_guid);
+
+ kvfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
+
+int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
+ u16 vport, u64 node_guid)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *nic_vport_context;
+ void *in;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+ return -EACCES;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.node_guid, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+ nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+ u16 *qkey_viol_cntr)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out);
+
+ *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.qkey_violation_counter);
+
+ kvfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
+
+int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
+ u8 port_num, u16 vf_num, u16 gid_index,
+ union ib_gid *gid)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_in);
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_out);
+ int is_group_manager;
+ void *out = NULL;
+ void *in = NULL;
+ union ib_gid *tmp;
+ int tbsz;
+ int nout;
+ int err;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ tbsz = mlx5_get_gid_table_len(MLX5_CAP_GEN(dev, gid_table_size));
+ mlx5_core_dbg(dev, "vf_num %d, index %d, gid_table_size %d\n",
+ vf_num, gid_index, tbsz);
+
+ if (gid_index > tbsz && gid_index != 0xffff)
+ return -EINVAL;
+
+ if (gid_index == 0xffff)
+ nout = tbsz;
+ else
+ nout = 1;
+
+ out_sz += nout * sizeof(*gid);
+
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(query_hca_vport_gid_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_GID);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_hca_vport_gid_in, in, vport_number, vf_num);
+ MLX5_SET(query_hca_vport_gid_in, in, other_vport, 1);
+ } else {
+ err = -EPERM;
+ goto out;
+ }
+ }
+ MLX5_SET(query_hca_vport_gid_in, in, gid_index, gid_index);
+
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_hca_vport_gid_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
+ if (err)
+ goto out;
+
+ tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out);
+ gid->global.subnet_prefix = tmp->global.subnet_prefix;
+ gid->global.interface_id = tmp->global.interface_id;
+
+out:
+ kvfree(in);
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid);
+
+int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
+ u8 port_num, u16 vf_num, u16 pkey_index,
+ u16 *pkey)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_in);
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_out);
+ int is_group_manager;
+ void *out = NULL;
+ void *in = NULL;
+ void *pkarr;
+ int nout;
+ int tbsz;
+ int err;
+ int i;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+
+ tbsz = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size));
+ if (pkey_index > tbsz && pkey_index != 0xffff)
+ return -EINVAL;
+
+ if (pkey_index == 0xffff)
+ nout = tbsz;
+ else
+ nout = 1;
+
+ out_sz += nout * MLX5_ST_SZ_BYTES(pkey);
+
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ out = kvzalloc(out_sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(query_hca_vport_pkey_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_hca_vport_pkey_in, in, vport_number, vf_num);
+ MLX5_SET(query_hca_vport_pkey_in, in, other_vport, 1);
+ } else {
+ err = -EPERM;
+ goto out;
+ }
+ }
+ MLX5_SET(query_hca_vport_pkey_in, in, pkey_index, pkey_index);
+
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_hca_vport_pkey_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
+ if (err)
+ goto out;
+
+ pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey);
+ for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey))
+ *pkey = MLX5_GET_PR(pkey, pkarr, pkey);
+
+out:
+ kvfree(in);
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey);
+
+int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
+ u8 other_vport, u8 port_num,
+ u16 vf_num,
+ struct mlx5_hca_vport_context *rep)
+{
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
+ int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {};
+ int is_group_manager;
+ void *out;
+ void *ctx;
+ int err;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+
+ out = kvzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT);
+
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_hca_vport_context_in, in, other_vport, 1);
+ MLX5_SET(query_hca_vport_context_in, in, vport_number, vf_num);
+ } else {
+ err = -EPERM;
+ goto ex;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_hca_vport_context_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec_inout(dev, query_hca_vport_context, in, out);
+ if (err)
+ goto ex;
+
+ ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context);
+ rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select);
+ rep->sm_virt_aware = MLX5_GET_PR(hca_vport_context, ctx, sm_virt_aware);
+ rep->has_smi = MLX5_GET_PR(hca_vport_context, ctx, has_smi);
+ rep->has_raw = MLX5_GET_PR(hca_vport_context, ctx, has_raw);
+ rep->policy = MLX5_GET_PR(hca_vport_context, ctx, vport_state_policy);
+ rep->phys_state = MLX5_GET_PR(hca_vport_context, ctx,
+ port_physical_state);
+ rep->vport_state = MLX5_GET_PR(hca_vport_context, ctx, vport_state);
+ rep->port_physical_state = MLX5_GET_PR(hca_vport_context, ctx,
+ port_physical_state);
+ rep->port_guid = MLX5_GET64_PR(hca_vport_context, ctx, port_guid);
+ rep->node_guid = MLX5_GET64_PR(hca_vport_context, ctx, node_guid);
+ rep->cap_mask1 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask1);
+ rep->cap_mask1_perm = MLX5_GET_PR(hca_vport_context, ctx,
+ cap_mask1_field_select);
+ rep->cap_mask2 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask2);
+ rep->cap_mask2_perm = MLX5_GET_PR(hca_vport_context, ctx,
+ cap_mask2_field_select);
+ rep->lid = MLX5_GET_PR(hca_vport_context, ctx, lid);
+ rep->init_type_reply = MLX5_GET_PR(hca_vport_context, ctx,
+ init_type_reply);
+ rep->lmc = MLX5_GET_PR(hca_vport_context, ctx, lmc);
+ rep->subnet_timeout = MLX5_GET_PR(hca_vport_context, ctx,
+ subnet_timeout);
+ rep->sm_lid = MLX5_GET_PR(hca_vport_context, ctx, sm_lid);
+ rep->sm_sl = MLX5_GET_PR(hca_vport_context, ctx, sm_sl);
+ rep->qkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx,
+ qkey_violation_counter);
+ rep->pkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx,
+ pkey_violation_counter);
+ rep->grh_required = MLX5_GET_PR(hca_vport_context, ctx, grh_required);
+ rep->sys_image_guid = MLX5_GET64_PR(hca_vport_context, ctx,
+ system_image_guid);
+
+ex:
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context);
+
+int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
+ u64 *sys_image_guid)
+{
+ struct mlx5_hca_vport_context *rep;
+ int err;
+
+ rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep);
+ if (!err)
+ *sys_image_guid = rep->sys_image_guid;
+
+ kvfree(rep);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid);
+
+int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
+ u64 *node_guid)
+{
+ struct mlx5_hca_vport_context *rep;
+ int err;
+
+ rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep);
+ if (!err)
+ *node_guid = rep->node_guid;
+
+ kvfree(rep);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
+
+int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ u16 vport,
+ int *promisc_uc,
+ int *promisc_mc,
+ int *promisc_all)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, vport, out);
+ if (err)
+ goto out;
+
+ *promisc_uc = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_uc);
+ *promisc_mc = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_mc);
+ *promisc_all = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_all);
+
+out:
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc);
+
+int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ int promisc_uc,
+ int promisc_mc,
+ int promisc_all)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_uc, promisc_uc);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_mc, promisc_mc);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_all, promisc_all);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
+
+ kvfree(in);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+
+enum {
+ UC_LOCAL_LB,
+ MC_LOCAL_LB
+};
+
+int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, disable_local_lb_mc) &&
+ !MLX5_CAP_GEN(mdev, disable_local_lb_uc))
+ return 0;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.disable_mc_local_lb, !enable);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.disable_uc_local_lb, !enable);
+
+ if (MLX5_CAP_GEN(mdev, disable_local_lb_mc))
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.disable_mc_local_lb, 1);
+
+ if (MLX5_CAP_GEN(mdev, disable_local_lb_uc))
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.disable_uc_local_lb, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
+
+ if (!err)
+ mlx5_core_dbg(mdev, "%s local_lb\n",
+ enable ? "enable" : "disable");
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_update_local_lb);
+
+int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 *out;
+ int value;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
+ if (err)
+ goto out;
+
+ value = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.disable_mc_local_lb) << MC_LOCAL_LB;
+
+ value |= MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.disable_uc_local_lb) << UC_LOCAL_LB;
+
+ *status = !value;
+
+out:
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb);
+
+enum mlx5_vport_roce_state {
+ MLX5_VPORT_ROCE_DISABLED = 0,
+ MLX5_VPORT_ROCE_ENABLED = 1,
+};
+
+static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
+ enum mlx5_vport_roce_state state)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+ state);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (!mdev->roce.roce_en)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+
+ if (!err)
+ mdev->roce.roce_en++;
+ mutex_unlock(&mlx5_roce_en_lock);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
+
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (mdev->roce.roce_en) {
+ mdev->roce.roce_en--;
+ if (mdev->roce.roce_en == 0)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+
+ if (err)
+ mdev->roce.roce_en++;
+ }
+ mutex_unlock(&mlx5_roce_en_lock);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_nic_vport_disable_roce);
+
+int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
+ int vf, u8 port_num, void *out)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
+ int is_group_manager;
+ void *in;
+ int err;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ MLX5_SET(query_vport_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+ MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1);
+ } else {
+ err = -EPERM;
+ goto free;
+ }
+ }
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_vport_counter_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec_inout(dev, query_vport_counter, in, out);
+free:
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
+
+int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
+ u8 other_vport, u64 *rx_discard_vport_down,
+ u64 *tx_discard_vport_down)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+ int err;
+
+ MLX5_SET(query_vnic_env_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VNIC_ENV);
+ MLX5_SET(query_vnic_env_in, in, op_mod, 0);
+ MLX5_SET(query_vnic_env_in, in, vport_number, vport);
+ MLX5_SET(query_vnic_env_in, in, other_vport, other_vport);
+
+ err = mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out);
+ if (err)
+ return err;
+
+ *rx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+ vport_env.receive_discard_vport_down);
+ *tx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+ vport_env.transmit_discard_vport_down);
+ return 0;
+}
+
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+ u8 other_vport, u8 port_num,
+ int vf,
+ struct mlx5_hca_vport_context *req)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in);
+ int is_group_manager;
+ void *ctx;
+ void *in;
+ int err;
+
+ mlx5_core_dbg(dev, "vf %d\n", vf);
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf);
+ } else {
+ err = -EPERM;
+ goto ex;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev, num_ports) > 1)
+ MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num);
+
+ ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context);
+ MLX5_SET(hca_vport_context, ctx, field_select, req->field_select);
+ if (req->field_select & MLX5_HCA_VPORT_SEL_STATE_POLICY)
+ MLX5_SET(hca_vport_context, ctx, vport_state_policy,
+ req->policy);
+ if (req->field_select & MLX5_HCA_VPORT_SEL_PORT_GUID)
+ MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
+ if (req->field_select & MLX5_HCA_VPORT_SEL_NODE_GUID)
+ MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select,
+ req->cap_mask1_perm);
+ err = mlx5_cmd_exec_in(dev, modify_hca_vport_context, in);
+ex:
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
+
+int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
+ struct mlx5_core_dev *port_mdev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = mlx5_nic_vport_enable_roce(port_mdev);
+ if (err)
+ goto free;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+ if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) {
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id,
+ MLX5_CAP_GEN_2(master_mdev, sw_vhca_id));
+ } else {
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id,
+ MLX5_CAP_GEN(master_mdev, vhca_id));
+ }
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliation_criteria,
+ MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in);
+ if (err)
+ mlx5_nic_vport_disable_roce(port_mdev);
+
+free:
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport);
+
+int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id, 0);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliation_criteria, 0);
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in);
+ if (!err)
+ mlx5_nic_vport_disable_roce(port_mdev);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
+
+u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
+{
+ int port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ u64 tmp;
+ int err;
+
+ if (mdev->sys_image_guid)
+ return mdev->sys_image_guid;
+
+ if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH)
+ err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ else
+ err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
+
+ mdev->sys_image_guid = err ? 0 : tmp;
+
+ return mdev->sys_image_guid;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
+
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out)
+{
+ u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ MLX5_SET(query_hca_cap_in, in, function_id, function_id);
+ MLX5_SET(query_hca_cap_in, in, other_function, true);
+ return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
new file mode 100644
index 000000000..3091dd014
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "wq.h"
+#include "mlx5_core.h"
+
+int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_cyc *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+ u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz);
+ struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
+ int err;
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ wq->db = wq_ctrl->db.db;
+
+ err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
+ wq->sz = mlx5_wq_cyc_get_size(wq);
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides)
+{
+ size_t len;
+ void *wqe;
+
+ if (!net_ratelimit())
+ return;
+
+ nstrides = max_t(u8, nstrides, 1);
+
+ len = nstrides << wq->fbc.log_stride;
+ wqe = mlx5_wq_cyc_get_wqe(wq, ix);
+
+ pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %zu\n",
+ mlx5_wq_cyc_get_size(wq), wq->cur_sz, ix, len);
+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false);
+}
+
+void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq)
+{
+ wq->wqe_ctr = 0;
+ wq->cur_sz = 0;
+ mlx5_wq_cyc_update_db_record(wq);
+}
+
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+ u8 log_rq_sz = MLX5_GET(qpc, qpc, log_rq_size);
+ u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB);
+ u8 log_sq_sz = MLX5_GET(qpc, qpc, log_sq_size);
+
+ u32 rq_byte_size;
+ int err;
+
+
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_frag_buf_alloc_node(mdev,
+ wq_get_byte_sz(log_rq_sz, log_rq_stride) +
+ wq_get_byte_sz(log_sq_sz, log_sq_stride),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc);
+
+ rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride);
+
+ if (rq_byte_size < PAGE_SIZE) {
+ /* SQ starts within the same page of the RQ */
+ u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB;
+
+ mlx5_init_fbc_offset(wq_ctrl->buf.frags,
+ log_sq_stride, log_sq_sz, sq_strides_offset,
+ &wq->sq.fbc);
+ } else {
+ u16 rq_npages = rq_byte_size >> PAGE_SHIFT;
+
+ mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages,
+ log_sq_stride, log_sq_sz, &wq->sq.fbc);
+ }
+
+ wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR];
+ wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR];
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *cqc, struct mlx5_cqwq *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */
+ u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7;
+ u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size);
+ int err;
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ wq->db = wq_ctrl->db.db;
+
+ err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
+ &wq_ctrl->buf,
+ param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n",
+ err);
+ goto err_db_free;
+ }
+
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc);
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq)
+{
+ struct mlx5_wqe_srq_next_seg *next_seg;
+ int i;
+
+ for (i = 0; i < wq->fbc.sz_m1; i++) {
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ next_seg->next_wqe_index = cpu_to_be16(i + 1);
+ }
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ wq->tail_next = &next_seg->next_wqe_index;
+}
+
+int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_ll *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+ u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz);
+ struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
+ int err;
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ wq->db = wq_ctrl->db.db;
+
+ err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
+
+ mlx5_wq_ll_init_list(wq);
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq)
+{
+ wq->head = 0;
+ wq->wqe_ctr = 0;
+ wq->cur_sz = 0;
+ mlx5_wq_ll_init_list(wq);
+ mlx5_wq_ll_update_db_record(wq);
+}
+
+void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
+{
+ mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
+ mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
new file mode 100644
index 000000000..4d629e5dd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_WQ_H__
+#define __MLX5_WQ_H__
+
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+
+struct mlx5_wq_param {
+ int buf_numa_node;
+ int db_numa_node;
+};
+
+struct mlx5_wq_ctrl {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_frag_buf buf;
+ struct mlx5_db db;
+};
+
+struct mlx5_wq_cyc {
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ u16 sz;
+ u16 wqe_ctr;
+ u16 cur_sz;
+};
+
+struct mlx5_wq_qp {
+ struct mlx5_wq_cyc rq;
+ struct mlx5_wq_cyc sq;
+};
+
+struct mlx5_cqwq {
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ u32 cc; /* consumer counter */
+};
+
+struct mlx5_wq_ll {
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ __be16 *tail_next;
+ u16 head;
+ u16 wqe_ctr;
+ u16 cur_sz;
+};
+
+int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_cyc *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides);
+void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq);
+
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+
+int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *cqc, struct mlx5_cqwq *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+
+int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_ll *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq);
+
+void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
+
+static inline u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
+{
+ return (u32)wq->fbc.sz_m1 + 1;
+}
+
+static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq)
+{
+ return wq->cur_sz == wq->sz;
+}
+
+static inline int mlx5_wq_cyc_missing(struct mlx5_wq_cyc *wq)
+{
+ return wq->sz - wq->cur_sz;
+}
+
+static inline int mlx5_wq_cyc_is_empty(struct mlx5_wq_cyc *wq)
+{
+ return !wq->cur_sz;
+}
+
+static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq)
+{
+ wq->wqe_ctr++;
+ wq->cur_sz++;
+}
+
+static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u16 n)
+{
+ wq->wqe_ctr += n;
+ wq->cur_sz += n;
+}
+
+static inline void mlx5_wq_cyc_pop(struct mlx5_wq_cyc *wq)
+{
+ wq->cur_sz--;
+}
+
+static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq)
+{
+ *wq->db = cpu_to_be32(wq->wqe_ctr);
+}
+
+static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
+{
+ return ctr & wq->fbc.sz_m1;
+}
+
+static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
+{
+ return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
+}
+
+static inline u16 mlx5_wq_cyc_get_tail(struct mlx5_wq_cyc *wq)
+{
+ return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr - wq->cur_sz);
+}
+
+static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+}
+
+static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
+}
+
+static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
+{
+ int equal = (cc1 == cc2);
+ int smaller = 0x8000 & (cc1 - cc2);
+
+ return !equal && !smaller;
+}
+
+static inline u16 mlx5_wq_cyc_get_counter(struct mlx5_wq_cyc *wq)
+{
+ return wq->wqe_ctr;
+}
+
+static inline u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
+{
+ return wq->fbc.sz_m1 + 1;
+}
+
+static inline u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq)
+{
+ return wq->fbc.log_stride;
+}
+
+static inline u32 mlx5_cqwq_ctr2ix(struct mlx5_cqwq *wq, u32 ctr)
+{
+ return ctr & wq->fbc.sz_m1;
+}
+
+static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
+{
+ return mlx5_cqwq_ctr2ix(wq, wq->cc);
+}
+
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
+{
+ struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+
+ /* For 128B CQEs the data is in the last 64B */
+ cqe += wq->fbc.log_stride == 7;
+
+ return cqe;
+}
+
+static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr)
+{
+ return ctr >> wq->fbc.log_sz;
+}
+
+static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
+{
+ return mlx5_cqwq_get_ctr_wrap_cnt(wq, wq->cc);
+}
+
+static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq)
+{
+ wq->cc++;
+}
+
+static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq)
+{
+ *wq->db = cpu_to_be32(wq->cc & 0xffffff);
+}
+
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
+{
+ u32 ci = mlx5_cqwq_get_ci(wq);
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
+ u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
+ u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
+
+ if (cqe_ownership_bit != sw_ownership_val)
+ return NULL;
+
+ /* ensure cqe content is read after cqe ownership bit */
+ dma_rmb();
+
+ return cqe;
+}
+
+static inline u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
+{
+ return (u32)wq->fbc.sz_m1 + 1;
+}
+
+static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq)
+{
+ return wq->cur_sz == wq->fbc.sz_m1;
+}
+
+static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq)
+{
+ return !wq->cur_sz;
+}
+
+static inline int mlx5_wq_ll_missing(struct mlx5_wq_ll *wq)
+{
+ return wq->fbc.sz_m1 - wq->cur_sz;
+}
+
+static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+}
+
+static inline u16 mlx5_wq_ll_get_wqe_next_ix(struct mlx5_wq_ll *wq, u16 ix)
+{
+ struct mlx5_wqe_srq_next_seg *wqe = mlx5_wq_ll_get_wqe(wq, ix);
+
+ return be16_to_cpu(wqe->next_wqe_index);
+}
+
+static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next)
+{
+ wq->head = head_next;
+ wq->wqe_ctr++;
+ wq->cur_sz++;
+}
+
+static inline void mlx5_wq_ll_pop(struct mlx5_wq_ll *wq, __be16 ix,
+ __be16 *next_tail_next)
+{
+ *wq->tail_next = ix;
+ wq->tail_next = next_tail_next;
+ wq->cur_sz--;
+}
+
+static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq)
+{
+ *wq->db = cpu_to_be32(wq->wqe_ctr);
+}
+
+static inline u16 mlx5_wq_ll_get_head(struct mlx5_wq_ll *wq)
+{
+ return wq->head;
+}
+
+static inline u16 mlx5_wq_ll_get_counter(struct mlx5_wq_ll *wq)
+{
+ return wq->wqe_ctr;
+}
+
+#endif /* __MLX5_WQ_H__ */