summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
commit2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch)
tree848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/net/ethernet/mellanox/mlx5/core/en
parentInitial commit. (diff)
downloadlinux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz
linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c81
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h204
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c615
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.c339
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.c722
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c159
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c263
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c215
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c151
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c1240
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c594
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c362
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c877
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h102
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c518
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.h51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c351
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c569
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c398
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c900
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c759
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c614
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.c606
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.h50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c640
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h69
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.c266
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c154
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c124
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c337
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c119
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c228
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c380
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c457
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c585
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c177
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c209
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c655
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c2272
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h220
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h218
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c991
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h120
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c1766
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c375
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c128
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c165
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.c203
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c331
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h494
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c691
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h186
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c230
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c311
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c191
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h15
110 files changed, 27131 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
new file mode 100644
index 000000000..48581ea3a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "channels.h"
+#include "en.h"
+#include "en/ptp.h"
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
+{
+ return chs->num;
+}
+
+static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix)
+{
+ WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs));
+ return chs->c[ix];
+}
+
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+}
+
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ *rqn = c->rq.rqn;
+}
+
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state));
+
+ *rqn = c->xskrq.rqn;
+}
+
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
+{
+ struct mlx5e_ptp *c = chs->ptp;
+
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+ return false;
+
+ *rqn = c->rq.rqn;
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
new file mode 100644
index 000000000..637ca90da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_CHANNELS_H__
+#define __MLX5_EN_CHANNELS_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_channels;
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix);
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
+
+#endif /* __MLX5_EN_CHANNELS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
new file mode 100644
index 000000000..b59aee75d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5E_DCBNL_H__
+#define __MLX5E_DCBNL_H__
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+
+#define MLX5E_MAX_PRIORITY (8)
+
+struct mlx5e_cee_config {
+ /* bw pct for priority group */
+ u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
+ u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+ bool pfc_setting[CEE_DCBX_MAX_PRIO];
+ bool pfc_enable;
+};
+
+struct mlx5e_dcbx {
+ enum mlx5_dcbx_oper_mode mode;
+ struct mlx5e_cee_config cee_cfg; /* pending configuration */
+ u8 dscp_app_cnt;
+
+ /* The only setting that cannot be read from FW */
+ u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
+ u8 cap;
+
+ /* Buffer configuration */
+ bool manual_buffer;
+ u32 cable_len;
+ u32 xoff;
+ u16 port_buff_cell_sz;
+};
+
+#define MLX5E_MAX_DSCP (64)
+
+struct mlx5e_dcbx_dp {
+ u8 dscp2prio[MLX5E_MAX_DSCP];
+ u8 trust_state;
+};
+
+void mlx5e_dcbnl_build_netdev(struct net_device *netdev);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
+#else
+static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {}
+static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5E_DCBNL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
new file mode 100644
index 000000000..b69f9d10c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "en/devlink.h"
+#include "eswitch.h"
+
+static void
+mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
+{
+ u64 parent_id;
+
+ parent_id = mlx5_query_nic_system_image_guid(dev);
+ ppid->id_len = sizeof(parent_id);
+ memcpy(ppid->id, &parent_id, sizeof(parent_id));
+}
+
+int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
+{
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
+ struct devlink_port_attrs attrs = {};
+ struct netdev_phys_item_id ppid = {};
+ struct devlink_port *dl_port;
+ unsigned int dl_port_index;
+ int ret;
+
+ if (mlx5_core_is_pf(priv->mdev)) {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.phys.port_number = mlx5_get_dev_index(priv->mdev);
+ if (MLX5_ESWITCH_MANAGER(priv->mdev)) {
+ mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid);
+ memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
+ attrs.switch_id.id_len = ppid.id_len;
+ }
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev,
+ MLX5_VPORT_UPLINK);
+ } else {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0);
+ }
+
+ dl_port = mlx5e_devlink_get_dl_port(priv);
+ memset(dl_port, 0, sizeof(*dl_port));
+ devlink_port_attrs_set(dl_port, &attrs);
+
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ ret = devl_port_register(devlink, dl_port, dl_port_index);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
+
+ return ret;
+}
+
+void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+
+ devlink_port_type_eth_set(dl_port, priv->netdev);
+}
+
+void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
+
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ devl_port_unregister(dl_port);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
+}
+
+struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (!netif_device_present(dev))
+ return NULL;
+
+ return mlx5e_devlink_get_dl_port(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
new file mode 100644
index 000000000..10b50feb9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_EN_DEVLINK_H
+#define __MLX5E_EN_DEVLINK_H
+
+#include <net/devlink.h>
+#include "en.h"
+
+int mlx5e_devlink_port_register(struct mlx5e_priv *priv);
+void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv);
+void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv);
+struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev);
+
+static inline struct devlink_port *
+mlx5e_devlink_get_dl_port(struct mlx5e_priv *priv)
+{
+ return &priv->mdev->mlx5e_res.dl_port;
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
new file mode 100644
index 000000000..bf2741eb7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5E_FLOW_STEER_H__
+#define __MLX5E_FLOW_STEER_H__
+
+#include "mod_hdr.h"
+#include "lib/fs_ttc.h"
+
+struct mlx5e_post_act;
+struct mlx5e_tc_table;
+
+enum {
+ MLX5E_TC_FT_LEVEL = 0,
+ MLX5E_TC_TTC_FT_LEVEL,
+ MLX5E_TC_MISS_LEVEL,
+};
+
+enum {
+ MLX5E_TC_PRIO = 0,
+ MLX5E_NIC_PRIO
+};
+
+struct mlx5e_flow_table {
+ int num_groups;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_group **g;
+};
+
+struct mlx5e_l2_rule {
+ u8 addr[ETH_ALEN + 2];
+ struct mlx5_flow_handle *rule;
+};
+
+#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE)
+
+struct mlx5e_promisc_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *rule;
+};
+
+/* Forward declaration and APIs to get private fields of vlan_table */
+struct mlx5e_vlan_table;
+unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan);
+struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan);
+
+struct mlx5e_l2_table {
+ struct mlx5e_flow_table ft;
+ struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE];
+ struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE];
+ struct mlx5e_l2_rule broadcast;
+ struct mlx5e_l2_rule allmulti;
+ struct mlx5_flow_handle *trap_rule;
+ bool broadcast_enabled;
+ bool allmulti_enabled;
+ bool promisc_enabled;
+};
+
+#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1)
+
+#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP)
+#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_L4_SPORT |\
+ MLX5_HASH_FIELD_SEL_L4_DPORT)
+#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
+/* NIC prio FTS */
+enum {
+ MLX5E_PROMISC_FT_LEVEL,
+ MLX5E_VLAN_FT_LEVEL,
+ MLX5E_L2_FT_LEVEL,
+ MLX5E_TTC_FT_LEVEL,
+ MLX5E_INNER_TTC_FT_LEVEL,
+ MLX5E_FS_TT_UDP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+ MLX5E_FS_TT_ANY_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+#ifdef CONFIG_MLX5_EN_TLS
+ MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+#endif
+#ifdef CONFIG_MLX5_EN_ARFS
+ MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+ MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
+#endif
+};
+
+struct mlx5e_flow_steering;
+struct mlx5e_rx_res;
+
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables;
+
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple);
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple);
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs);
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs);
+int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id);
+#else
+static inline int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
+{ return 0; }
+static inline void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) {}
+static inline int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
+#endif
+
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp;
+#endif
+
+struct mlx5e_profile;
+struct mlx5e_fs_udp;
+struct mlx5e_fs_any;
+struct mlx5e_ptp_fs;
+
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ struct ttc_params *ttc_params, bool tunnel);
+
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs);
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res);
+
+void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev);
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile);
+
+struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
+ struct mlx5_core_dev *mdev,
+ bool state_destroy);
+void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs);
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc);
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs);
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress);
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress);
+#ifdef CONFIG_MLX5_EN_RXNFC
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs);
+#endif
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner);
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner);
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs);
+#endif
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs);
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any);
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp);
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp);
+#endif
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy);
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, bool vlan_strip_disable);
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs);
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs);
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+
+#define fs_err(fs, fmt, ...) \
+ mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_dbg(fs, fmt, ...) \
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn(fs, fmt, ...) \
+ mlx5_core_warn(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn_once(fs, fmt, ...) \
+ mlx5_core_warn_once(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#endif /* __MLX5E_FLOW_STEER_H__ */
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
new file mode 100644
index 000000000..9e276fd3c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5E_FS_ETHTOOL_H__
+#define __MLX5E_FS_ETHTOOL_H__
+
+struct mlx5e_priv;
+struct mlx5e_ethtool_steering;
+#ifdef CONFIG_MLX5_EN_RXNFC
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool);
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool);
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs);
+int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd);
+int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs);
+#else
+static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{ return 0; }
+static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { }
+static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { }
+static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { }
+static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{ return -EOPNOTSUPP; }
+#endif
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
new file mode 100644
index 000000000..671adbad0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "en/fs_tt_redirect.h"
+#include "fs_core.h"
+#include "mlx5_core.h"
+
+enum fs_udp_type {
+ FS_IPV4_UDP,
+ FS_IPV6_UDP,
+ FS_UDP_NUM_TYPES,
+};
+
+struct mlx5e_fs_udp {
+ struct mlx5e_flow_table tables[FS_UDP_NUM_TYPES];
+ struct mlx5_flow_handle *default_rules[FS_UDP_NUM_TYPES];
+ int ref_cnt;
+};
+
+struct mlx5e_fs_any {
+ struct mlx5e_flow_table table;
+ struct mlx5_flow_handle *default_rule;
+ int ref_cnt;
+};
+
+static char *fs_udp_type2str(enum fs_udp_type i)
+{
+ switch (i) {
+ case FS_IPV4_UDP:
+ return "UDP v4";
+ default: /* FS_IPV6_UDP */
+ return "UDP v6";
+ }
+}
+
+static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i)
+{
+ switch (i) {
+ case FS_IPV4_UDP:
+ return MLX5_TT_IPV4_UDP;
+ default: /* FS_IPV6_UDP */
+ return MLX5_TT_IPV6_UDP;
+ }
+}
+
+static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i)
+{
+ switch (i) {
+ case MLX5_TT_IPV4_UDP:
+ return FS_IPV4_UDP;
+ case MLX5_TT_IPV6_UDP:
+ return FS_IPV6_UDP;
+ default:
+ return FS_UDP_NUM_TYPES;
+ }
+}
+
+void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule)
+{
+ mlx5_del_flow_rules(rule);
+}
+
+static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type type,
+ u16 udp_dport)
+{
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+ type == FS_IPV4_UDP ? 4 : 6);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, udp_dport);
+}
+
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5_traffic_types ttc_type,
+ u32 tir_num, u16 d_port)
+{
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ enum fs_udp_type type = tt2fs_udp(ttc_type);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_table *ft = NULL;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ if (type == FS_UDP_NUM_TYPES)
+ return ERR_PTR(-EINVAL);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ft = fs_udp->tables[type].t;
+
+ fs_udp_set_dport_flow(spec, type, d_port);
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tir_num;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add %s rule failed, err %d\n",
+ __func__, fs_udp_type2str(type), err);
+ }
+ return rule;
+}
+
+static int fs_udp_add_default_rule(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ struct mlx5e_flow_table *fs_udp_t;
+ struct mlx5_flow_destination dest;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ fs_udp_t = &fs_udp->tables[type];
+
+ dest = mlx5_ttc_get_default_dest(ttc, fs_udp2tt(type));
+ rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add default rule failed, fs type=%d, err %d\n",
+ __func__, type, err);
+ return err;
+ }
+
+ fs_udp->default_rules[type] = rule;
+ return 0;
+}
+
+#define MLX5E_FS_UDP_NUM_GROUPS (2)
+#define MLX5E_FS_UDP_GROUP1_SIZE (BIT(16))
+#define MLX5E_FS_UDP_GROUP2_SIZE (BIT(0))
+#define MLX5E_FS_UDP_TABLE_SIZE (MLX5E_FS_UDP_GROUP1_SIZE +\
+ MLX5E_FS_UDP_GROUP2_SIZE)
+static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type type)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kfree(ft->g);
+ ft->g = NULL;
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version);
+
+ switch (type) {
+ case FS_IPV4_UDP:
+ case FS_IPV6_UDP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ /* Match on udp protocol, Ipv4/6 and dport */
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_UDP_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Default Flow Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_UDP_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+out:
+ kvfree(in);
+
+ return err;
+}
+
+static int fs_udp_create_table(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
+{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_flow_table *ft;
+ int err;
+
+ ft = &fs_udp->tables[type];
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
+ ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs %s table id %u level %u\n",
+ fs_udp_type2str(type), ft->t->id, ft->t->level);
+
+ err = fs_udp_create_groups(ft, type);
+ if (err)
+ goto err;
+
+ err = fs_udp_add_default_rule(fs, type);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i)
+{
+ if (IS_ERR_OR_NULL(fs_udp->tables[i].t))
+ return;
+
+ mlx5_del_flow_rules(fs_udp->default_rules[i]);
+ mlx5e_destroy_flow_table(&fs_udp->tables[i]);
+ fs_udp->tables[i].t = NULL;
+}
+
+static int fs_udp_disable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err, i;
+
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+ /* Modify ttc rules destination to point back to the indir TIRs */
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_udp2tt(i));
+ if (err) {
+ fs_err(fs, "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int fs_udp_enable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
+ struct mlx5_flow_destination dest = {};
+ int err, i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+ dest.ft = udp->tables[i].t;
+
+ /* Modify ttc rules destination to point on the accel_fs FTs */
+ err = mlx5_ttc_fwd_dest(ttc, fs_udp2tt(i), &dest);
+ if (err) {
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
+ return err;
+ }
+ }
+ return 0;
+}
+
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
+ int i;
+
+ if (!fs_udp)
+ return;
+
+ if (--fs_udp->ref_cnt)
+ return;
+
+ fs_udp_disable(fs);
+
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++)
+ fs_udp_destroy_table(fs_udp, i);
+
+ kfree(fs_udp);
+ mlx5e_fs_set_udp(fs, NULL);
+}
+
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
+ int i, err;
+
+ if (udp) {
+ udp->ref_cnt++;
+ return 0;
+ }
+
+ udp = kzalloc(sizeof(*udp), GFP_KERNEL);
+ if (!udp)
+ return -ENOMEM;
+ mlx5e_fs_set_udp(fs, udp);
+
+ for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+ err = fs_udp_create_table(fs, i);
+ if (err)
+ goto err_destroy_tables;
+ }
+
+ err = fs_udp_enable(fs);
+ if (err)
+ goto err_destroy_tables;
+
+ udp->ref_cnt = 1;
+
+ return 0;
+
+err_destroy_tables:
+ while (--i >= 0)
+ fs_udp_destroy_table(udp, i);
+
+ kfree(udp);
+ mlx5e_fs_set_udp(fs, NULL);
+ return err;
+}
+
+static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_type)
+{
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ether_type);
+}
+
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
+ u32 tir_num, u16 ether_type)
+{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_table *ft = NULL;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ft = fs_any->table.t;
+
+ fs_any_set_ethertype_flow(spec, ether_type);
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tir_num;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add ANY rule failed, err %d\n",
+ __func__, err);
+ }
+ return rule;
+}
+
+static int fs_any_add_default_rule(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5e_flow_table *fs_any_t;
+ struct mlx5_flow_destination dest;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ fs_any_t = &fs_any->table;
+ dest = mlx5_ttc_get_default_dest(ttc, MLX5_TT_ANY);
+ rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ fs_err(fs, "%s: add default rule failed, fs type=ANY, err %d\n",
+ __func__, err);
+ return err;
+ }
+
+ fs_any->default_rule = rule;
+ return 0;
+}
+
+#define MLX5E_FS_ANY_NUM_GROUPS (2)
+#define MLX5E_FS_ANY_GROUP1_SIZE (BIT(16))
+#define MLX5E_FS_ANY_GROUP2_SIZE (BIT(0))
+#define MLX5E_FS_ANY_TABLE_SIZE (MLX5E_FS_ANY_GROUP1_SIZE +\
+ MLX5E_FS_ANY_GROUP2_SIZE)
+
+static int fs_any_create_groups(struct mlx5e_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kfree(ft->g);
+ ft->g = NULL;
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ /* Match on ethertype */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_ANY_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Default Flow Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_FS_ANY_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static int fs_any_create_table(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5e_flow_table *ft = &fs_any->table;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
+ ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs ANY table id %u level %u\n",
+ ft->t->id, ft->t->level);
+
+ err = fs_any_create_groups(ft);
+ if (err)
+ goto err;
+
+ err = fs_any_add_default_rule(fs);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static int fs_any_disable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ int err;
+
+ /* Modify ttc rules destination to point back to the indir TIRs */
+ err = mlx5_ttc_fwd_default_dest(ttc, MLX5_TT_ANY);
+ if (err) {
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
+ return err;
+ }
+ return 0;
+}
+
+static int fs_any_enable(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *any = mlx5e_fs_get_any(fs);
+ struct mlx5_flow_destination dest = {};
+ int err;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = any->table.t;
+
+ /* Modify ttc rules destination to point on the accel_fs FTs */
+ err = mlx5_ttc_fwd_dest(ttc, MLX5_TT_ANY, &dest);
+ if (err) {
+ fs_err(fs,
+ "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
+ return err;
+ }
+ return 0;
+}
+
+static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any)
+{
+ if (IS_ERR_OR_NULL(fs_any->table.t))
+ return;
+
+ mlx5_del_flow_rules(fs_any->default_rule);
+ mlx5e_destroy_flow_table(&fs_any->table);
+ fs_any->table.t = NULL;
+}
+
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+
+ if (!fs_any)
+ return;
+
+ if (--fs_any->ref_cnt)
+ return;
+
+ fs_any_disable(fs);
+
+ fs_any_destroy_table(fs_any);
+
+ kfree(fs_any);
+ mlx5e_fs_set_any(fs, NULL);
+}
+
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ int err;
+
+ if (fs_any) {
+ fs_any->ref_cnt++;
+ return 0;
+ }
+
+ fs_any = kzalloc(sizeof(*fs_any), GFP_KERNEL);
+ if (!fs_any)
+ return -ENOMEM;
+ mlx5e_fs_set_any(fs, fs_any);
+
+ err = fs_any_create_table(fs);
+ if (err)
+ goto err_free_any;
+
+ err = fs_any_enable(fs);
+ if (err)
+ goto err_destroy_table;
+
+ fs_any->ref_cnt = 1;
+
+ return 0;
+
+err_destroy_table:
+ fs_any_destroy_table(fs_any);
+err_free_any:
+ mlx5e_fs_set_any(fs, NULL);
+ kfree(fs_any);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
new file mode 100644
index 000000000..5780fd7ad
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5E_FS_TT_REDIRECT_H__
+#define __MLX5E_FS_TT_REDIRECT_H__
+
+#include "en/fs.h"
+
+void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
+
+/* UDP traffic type redirect */
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
+ enum mlx5_traffic_types ttc_type,
+ u32 tir_num, u16 d_port);
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs);
+
+/* ANY traffic type redirect*/
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
+ u32 tir_num, u16 ether_type);
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
new file mode 100644
index 000000000..6f4e6c34b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "health.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+{
+ u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
+ u8 hw_status;
+ void *cqc;
+ int err;
+
+ err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
+ if (err)
+ return err;
+
+ cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
+ hw_status = MLX5_GET(cqc, cqc, status);
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+{
+ u8 cq_log_stride;
+ u32 cq_sz;
+ int err;
+
+ cq_sz = mlx5_cqwq_get_size(&cq->wq);
+ cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
+{
+ mlx5e_reporter_tx_create(priv);
+ mlx5e_reporter_rx_create(priv);
+}
+
+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
+{
+ mlx5e_reporter_rx_destroy(priv);
+ mlx5e_reporter_tx_destroy(priv);
+}
+
+void mlx5e_health_channels_update(struct mlx5e_priv *priv)
+{
+ if (priv->tx_reporter)
+ devlink_health_reporter_state_update(priv->tx_reporter,
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+ if (priv->rx_reporter)
+ devlink_health_reporter_state_update(priv->rx_reporter,
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+}
+
+int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
+{
+ struct mlx5e_modify_sq_param msp = {};
+ int err;
+
+ msp.curr_state = MLX5_SQC_STATE_ERR;
+ msp.next_state = MLX5_SQC_STATE_RST;
+
+ err = mlx5e_modify_sq(mdev, sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
+ return err;
+ }
+
+ memset(&msp, 0, sizeof(msp));
+ msp.curr_state = MLX5_SQC_STATE_RST;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+
+ err = mlx5e_modify_sq(mdev, sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
+{
+ int err = 0;
+
+ rtnl_lock();
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto out;
+
+ err = mlx5e_safe_reopen_channels(priv);
+
+out:
+ mutex_unlock(&priv->state_lock);
+ rtnl_unlock();
+
+ return err;
+}
+
+int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
+ struct mlx5e_ch_stats *stats)
+{
+ u32 eqe_count;
+
+ netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+ eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+
+ eqe_count = mlx5_eq_poll_irq_disabled(eq);
+ if (!eqe_count)
+ return -EIO;
+
+ netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
+ eqe_count, eq->core.eqn);
+
+ stats->eq_rearm++;
+ return 0;
+}
+
+int mlx5e_health_report(struct mlx5e_priv *priv,
+ struct devlink_health_reporter *reporter, char *err_str,
+ struct mlx5e_err_ctx *err_ctx)
+{
+ netdev_err(priv->netdev, "%s\n", err_str);
+
+ if (!reporter)
+ return err_ctx->recover(err_ctx->ctx);
+
+ return devlink_health_report(reporter, err_str, err_ctx);
+}
+
+#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
+static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
+ const void *value, u32 value_len)
+
+{
+ u32 data_size;
+ int err = 0;
+ u32 offset;
+
+ for (offset = 0; offset < value_len; offset += data_size) {
+ data_size = value_len - offset;
+ if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
+ data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
+ err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_rsc_dump_cmd *cmd;
+ struct page *page;
+ int cmd_err, err;
+ int end_err;
+ int size;
+
+ if (IS_ERR_OR_NULL(mdev->rsc_dump))
+ return -EOPNOTSUPP;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
+ if (err)
+ goto free_page;
+
+ cmd = mlx5_rsc_dump_cmd_create(mdev, key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+
+ do {
+ cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
+ if (cmd_err < 0) {
+ err = cmd_err;
+ goto destroy_cmd;
+ }
+
+ err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
+ if (err)
+ goto destroy_cmd;
+
+ } while (cmd_err > 0);
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+ end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
+ if (end_err)
+ err = end_err;
+free_page:
+ __free_page(page);
+ return err;
+}
+
+int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ int queue_idx, char *lbl)
+{
+ struct mlx5_rsc_key key = {};
+ int err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = queue_idx;
+ key.size = PAGE_SIZE;
+ key.num_of_obj1 = 1;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
+ if (err)
+ return err;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return devlink_fmsg_obj_nest_end(fmsg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
new file mode 100644
index 000000000..0107e4e73
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_EN_HEALTH_H
+#define __MLX5E_EN_HEALTH_H
+
+#include "en.h"
+#include "diag/rsc_dump.h"
+
+static inline bool cqe_syndrome_needs_recover(u8 syndrome)
+{
+ return syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+ syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
+ syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+}
+
+void mlx5e_reporter_tx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq);
+
+int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
+int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
+int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg);
+int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
+int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg);
+
+void mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
+void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c);
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c);
+
+#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
+
+struct mlx5e_err_ctx {
+ int (*recover)(void *ctx);
+ int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx);
+ void *ctx;
+};
+
+int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn);
+int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
+ struct mlx5e_ch_stats *stats);
+int mlx5e_health_recover_channels(struct mlx5e_priv *priv);
+int mlx5e_health_report(struct mlx5e_priv *priv,
+ struct devlink_health_reporter *reporter, char *err_str,
+ struct mlx5e_err_ctx *err_ctx);
+void mlx5e_health_create_reporters(struct mlx5e_priv *priv);
+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
+void mlx5e_health_channels_update(struct mlx5e_priv *priv);
+int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
+ struct devlink_fmsg *fmsg);
+int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ int queue_idx, char *lbl);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
new file mode 100644
index 000000000..09d441ecb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/pkt_cls.h>
+#include "htb.h"
+#include "en.h"
+#include "../qos.h"
+
+struct mlx5e_qos_node {
+ struct hlist_node hnode;
+ struct mlx5e_qos_node *parent;
+ u64 rate;
+ u32 bw_share;
+ u32 max_average_bw;
+ u32 hw_id;
+ u32 classid; /* 16-bit, except root. */
+ u16 qid;
+};
+
+struct mlx5e_htb {
+ DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
+ DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ struct mlx5e_selq *selq;
+};
+
+#define MLX5E_QOS_QID_INNER 0xffff
+#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
+
+/* Software representation of the QoS tree */
+
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt, err;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode) {
+ if (node->qid == MLX5E_QOS_QID_INNER)
+ continue;
+ err = callback(data, node->qid, node->hw_id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb)
+{
+ int last;
+
+ last = find_last_bit(htb->qos_used_qids, mlx5e_qos_max_leaf_nodes(htb->mdev));
+ return last == mlx5e_qos_max_leaf_nodes(htb->mdev) ? 0 : last + 1;
+}
+
+static int mlx5e_htb_find_unused_qos_qid(struct mlx5e_htb *htb)
+{
+ int size = mlx5e_qos_max_leaf_nodes(htb->mdev);
+ struct mlx5e_priv *priv = htb->priv;
+ int res;
+
+ WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
+ res = find_first_zero_bit(htb->qos_used_qids, size);
+
+ return res == size ? -ENOSPC : res;
+}
+
+static struct mlx5e_qos_node *
+mlx5e_htb_node_create_leaf(struct mlx5e_htb *htb, u16 classid, u16 qid,
+ struct mlx5e_qos_node *parent)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->parent = parent;
+
+ node->qid = qid;
+ __set_bit(qid, htb->qos_used_qids);
+
+ node->classid = classid;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, classid);
+
+ mlx5e_update_tx_netdev_queues(htb->priv);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_create_root(struct mlx5e_htb *htb)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->qid = MLX5E_QOS_QID_INNER;
+ node->classid = MLX5E_HTB_CLASSID_ROOT;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, node->classid);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_rcu(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible_rcu(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static void mlx5e_htb_node_delete(struct mlx5e_htb *htb, struct mlx5e_qos_node *node)
+{
+ hash_del_rcu(&node->hnode);
+ if (node->qid != MLX5E_QOS_QID_INNER) {
+ __clear_bit(node->qid, htb->qos_used_qids);
+ mlx5e_update_tx_netdev_queues(htb->priv);
+ }
+ /* Make sure this qid is no longer selected by mlx5e_select_queue, so
+ * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue.
+ */
+ synchronize_net();
+ kfree(node);
+}
+
+/* TX datapath API */
+
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid)
+{
+ struct mlx5e_qos_node *node;
+ u16 qid;
+ int res;
+
+ rcu_read_lock();
+
+ node = mlx5e_htb_node_find_rcu(htb, classid);
+ if (!node) {
+ res = -ENOENT;
+ goto out;
+ }
+ qid = READ_ONCE(node->qid);
+ if (qid == MLX5E_QOS_QID_INNER) {
+ res = -EINVAL;
+ goto out;
+ }
+ res = mlx5e_qid_from_qos(&htb->priv->channels, qid);
+
+out:
+ rcu_read_unlock();
+ return res;
+}
+
+/* HTB TC handlers */
+
+static int
+mlx5e_htb_root_add(struct mlx5e_htb *htb, u16 htb_maj_id, u16 htb_defcls,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
+
+ mlx5e_selq_prepare_htb(htb->selq, htb_maj_id, htb_defcls);
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ err = mlx5e_qos_alloc_queues(priv, &priv->channels);
+ if (err)
+ goto err_cancel_selq;
+ }
+
+ root = mlx5e_htb_node_create_root(htb);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto err_free_queues;
+ }
+
+ err = mlx5_qos_create_root_node(htb->mdev, &root->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
+ goto err_sw_node_delete;
+ }
+
+ mlx5e_selq_apply(htb->selq);
+
+ return 0;
+
+err_sw_node_delete:
+ mlx5e_htb_node_delete(htb, root);
+
+err_free_queues:
+ if (opened)
+ mlx5e_qos_close_all_queues(&priv->channels);
+err_cancel_selq:
+ mlx5e_selq_cancel(htb->selq);
+ return err;
+}
+
+static int mlx5e_htb_root_del(struct mlx5e_htb *htb)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_DESTROY\n");
+
+ /* Wait until real_num_tx_queues is updated for mlx5e_select_queue,
+ * so that we can safely switch to its non-HTB non-PTP fastpath.
+ */
+ synchronize_net();
+
+ mlx5e_selq_prepare_htb(htb->selq, 0, 0);
+ mlx5e_selq_apply(htb->selq);
+
+ root = mlx5e_htb_node_find(htb, MLX5E_HTB_CLASSID_ROOT);
+ if (!root) {
+ qos_err(htb->mdev, "Failed to find the root node in the QoS tree\n");
+ return -ENOENT;
+ }
+ err = mlx5_qos_destroy_node(htb->mdev, root->hw_id);
+ if (err)
+ qos_err(htb->mdev, "Failed to destroy root node %u, err = %d\n",
+ root->hw_id, err);
+ mlx5e_htb_node_delete(htb, root);
+
+ mlx5e_qos_deactivate_all_queues(&priv->channels);
+ mlx5e_qos_close_all_queues(&priv->channels);
+
+ return err;
+}
+
+static int mlx5e_htb_convert_rate(struct mlx5e_htb *htb, u64 rate,
+ struct mlx5e_qos_node *parent, u32 *bw_share)
+{
+ u64 share = 0;
+
+ while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
+ parent = parent->parent;
+
+ if (parent->max_average_bw)
+ share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
+ parent->max_average_bw);
+ else
+ share = 101;
+
+ *bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
+
+ qos_dbg(htb->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
+ rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
+
+ return 0;
+}
+
+static void mlx5e_htb_convert_ceil(struct mlx5e_htb *htb, u64 ceil, u32 *max_average_bw)
+{
+ /* Hardware treats 0 as "unlimited", set at least 1. */
+ *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
+
+ qos_dbg(htb->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+ ceil, *max_average_bw);
+}
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ int qid;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
+ classid, parent_classid, rate, ceil);
+
+ qid = mlx5e_htb_find_unused_qos_qid(htb);
+ if (qid < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
+ return qid;
+ }
+
+ parent = mlx5e_htb_node_find(htb, parent_classid);
+ if (!parent)
+ return -EINVAL;
+
+ node = mlx5e_htb_node_create_leaf(htb, classid, qid, parent);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &node->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &node->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ mlx5e_htb_node_delete(htb, node);
+ return err;
+ }
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ return mlx5e_qid_from_qos(&priv->channels, node->qid);
+}
+
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *child;
+ struct mlx5e_priv *priv = htb->priv;
+ int err, tmp_err;
+ u32 new_hw_id;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
+ classid, child_classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_inner_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
+ qos_err(htb->mdev, "Failed to create an inner node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ /* Intentionally reuse the qid for the upcoming first child. */
+ child = mlx5e_htb_node_create_leaf(htb, child_classid, node->qid, node);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ goto err_destroy_hw_node;
+ }
+
+ child->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node, &child->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &child->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, new_hw_id, child->bw_share,
+ child->max_average_bw, &child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ goto err_delete_sw_node;
+ }
+
+ /* No fail point. */
+
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, child->qid, child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, child->qid, child->hw_id);
+ }
+ }
+
+ return 0;
+
+err_delete_sw_node:
+ child->qid = MLX5E_QOS_QID_INNER;
+ mlx5e_htb_node_delete(htb, child);
+
+err_destroy_hw_node:
+ tmp_err = mlx5_qos_destroy_node(htb->mdev, new_hw_id);
+ if (tmp_err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
+ new_hw_id, classid, tmp_err);
+ return err;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_by_qid(struct mlx5e_htb *htb, u16 qid)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode)
+ if (node->qid == qid)
+ break;
+
+ return node;
+}
+
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *node;
+ struct netdev_queue *txq;
+ u16 qid, moved_qid;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
+
+ node = mlx5e_htb_node_find(htb, *classid);
+ if (!node)
+ return -ENOENT;
+
+ /* Store qid for reuse. */
+ qid = node->qid;
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, qid));
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, *classid, err);
+
+ mlx5e_htb_node_delete(htb, node);
+
+ moved_qid = mlx5e_htb_cur_leaf_nodes(htb);
+
+ if (moved_qid == 0) {
+ /* The last QoS SQ was just destroyed. */
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+ moved_qid--;
+
+ if (moved_qid < qid) {
+ /* The highest QoS SQ was just destroyed. */
+ WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
+ qid, moved_qid);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+
+ WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
+ qos_dbg(htb->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
+
+ node = mlx5e_htb_node_find_by_qid(htb, moved_qid);
+ WARN(!node, "Could not find a node with qid %u to move to queue %u",
+ moved_qid, qid);
+
+ /* Stop traffic to the old queue. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+ __clear_bit(moved_qid, priv->htb->qos_used_qids);
+
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, moved_qid));
+ mlx5e_deactivate_qos_sq(priv, moved_qid);
+ mlx5e_close_qos_sq(priv, moved_qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, moved_qid);
+
+ __set_bit(qid, htb->qos_used_qids);
+ WRITE_ONCE(node->qid, qid);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
+ node->classid, moved_qid, qid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ mlx5e_update_tx_netdev_queues(priv);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
+
+ *classid = node->classid;
+ return 0;
+}
+
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ u32 old_hw_id, new_hw_id;
+ int err, saved_err = 0;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
+ force ? "_FORCE" : "", classid);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->parent->hw_id,
+ node->parent->bw_share,
+ node->parent->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ if (!force)
+ return err;
+ saved_err = err;
+ }
+
+ /* Store qid for reuse and prevent clearing the bit. */
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, qid);
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ parent = node->parent;
+ mlx5e_htb_node_delete(htb, node);
+
+ node = parent;
+ WRITE_ONCE(node->qid, qid);
+
+ /* Early return on error in force mode. Parent will still be an inner
+ * node to be deleted by a following delete operation.
+ */
+ if (saved_err)
+ return saved_err;
+
+ old_hw_id = node->hw_id;
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, old_hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ return 0;
+}
+
+static int
+mlx5e_htb_update_children(struct mlx5e_htb *htb, struct mlx5e_qos_node *node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *child;
+ int err = 0;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, child, hnode) {
+ u32 old_bw_share = child->bw_share;
+ int err_one;
+
+ if (child->parent != node)
+ continue;
+
+ mlx5e_htb_convert_rate(htb, child->rate, node, &child->bw_share);
+ if (child->bw_share == old_bw_share)
+ continue;
+
+ err_one = mlx5_qos_update_node(htb->mdev, child->bw_share,
+ child->max_average_bw, child->hw_id);
+ if (!err && err_one) {
+ err = err_one;
+
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
+ qos_err(htb->mdev, "Failed to modify a child node (class %04x), err = %d\n",
+ node->classid, err);
+ }
+ }
+
+ return err;
+}
+
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ u32 bw_share, max_average_bw;
+ struct mlx5e_qos_node *node;
+ bool ceil_changed = false;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
+ classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &max_average_bw);
+
+ err = mlx5_qos_update_node(htb->mdev, bw_share,
+ max_average_bw, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
+ qos_err(htb->mdev, "Failed to modify a node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ if (max_average_bw != node->max_average_bw)
+ ceil_changed = true;
+
+ node->bw_share = bw_share;
+ node->max_average_bw = max_average_bw;
+
+ if (ceil_changed)
+ err = mlx5e_htb_update_children(htb, node, extack);
+
+ return err;
+}
+
+struct mlx5e_htb *mlx5e_htb_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_htb), GFP_KERNEL);
+}
+
+void mlx5e_htb_free(struct mlx5e_htb *htb)
+{
+ kvfree(htb);
+}
+
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv)
+{
+ htb->mdev = mdev;
+ htb->netdev = netdev;
+ htb->selq = selq;
+ htb->priv = priv;
+ hash_init(htb->qos_tc2node);
+ return mlx5e_htb_root_add(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->extack);
+}
+
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb)
+{
+ mlx5e_htb_root_del(htb);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
new file mode 100644
index 000000000..8386f1ea4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5E_EN_HTB_H_
+#define __MLX5E_EN_HTB_H_
+
+#include "qos.h"
+
+#define MLX5E_QOS_MAX_LEAF_NODES 256
+
+struct mlx5e_selq;
+struct mlx5e_htb;
+
+typedef int (*mlx5e_fp_htb_enumerate)(void *data, u16 qid, u32 hw_id);
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data);
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb);
+
+/* TX datapath API */
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid);
+
+/* HTB TC handlers */
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+struct mlx5e_htb *mlx5e_htb_alloc(void);
+void mlx5e_htb_free(struct mlx5e_htb *htb);
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv);
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb);
+#endif
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
new file mode 100644
index 000000000..b4f3bd7d3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include "en.h"
+#include "en/hv_vhca_stats.h"
+#include "lib/hv_vhca.h"
+#include "lib/hv.h"
+
+struct mlx5e_hv_vhca_per_ring_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+};
+
+static void
+mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
+ struct mlx5e_hv_vhca_per_ring_stats *data)
+{
+ struct mlx5e_channel_stats *stats;
+ int tc;
+
+ stats = priv->channel_stats[ch];
+ data->rx_packets = stats->rq.packets;
+ data->rx_bytes = stats->rq.bytes;
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++) {
+ data->tx_packets += stats->sq[tc].packets;
+ data->tx_bytes += stats->sq[tc].bytes;
+ }
+}
+
+static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data,
+ int buf_len)
+{
+ int ch, i = 0;
+
+ for (ch = 0; ch < priv->stats_nch; ch++) {
+ void *buf = data + i;
+
+ if (WARN_ON_ONCE(buf +
+ sizeof(struct mlx5e_hv_vhca_per_ring_stats) >
+ data + buf_len))
+ return;
+
+ mlx5e_hv_vhca_fill_ring_stats(priv, ch, buf);
+ i += sizeof(struct mlx5e_hv_vhca_per_ring_stats);
+ }
+}
+
+static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv)
+{
+ return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) *
+ priv->stats_nch);
+}
+
+static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5_hv_vhca_agent *agent;
+ struct delayed_work *dwork;
+ struct mlx5e_priv *priv;
+ int buf_len, rc;
+ void *buf;
+
+ dwork = to_delayed_work(work);
+ sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work);
+ priv = container_of(sagent, struct mlx5e_priv, stats_agent);
+ buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ agent = sagent->agent;
+ buf = sagent->buf;
+
+ memset(buf, 0, buf_len);
+ mlx5e_hv_vhca_fill_stats(priv, buf, buf_len);
+
+ rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len);
+ if (rc) {
+ mlx5_core_err(priv->mdev,
+ "%s: Failed to write stats, err = %d\n",
+ __func__, rc);
+ return;
+ }
+
+ if (sagent->delay)
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+enum {
+ MLX5_HV_VHCA_STATS_VERSION = 1,
+ MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF,
+};
+
+static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_control_block *block)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5e_priv *priv;
+
+ priv = mlx5_hv_vhca_agent_priv(agent);
+ sagent = &priv->stats_agent;
+
+ block->version = MLX5_HV_VHCA_STATS_VERSION;
+ block->rings = priv->stats_nch;
+
+ if (!block->command) {
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+ return;
+ }
+
+ sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 :
+ msecs_to_jiffies(block->command * 100);
+
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
+{
+ struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent);
+
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+}
+
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+ int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ struct mlx5_hv_vhca_agent *agent;
+
+ priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
+ if (!priv->stats_agent.buf)
+ return;
+
+ agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
+ MLX5_HV_VHCA_AGENT_STATS,
+ mlx5e_hv_vhca_stats_control, NULL,
+ mlx5e_hv_vhca_stats_cleanup,
+ priv);
+
+ if (IS_ERR_OR_NULL(agent)) {
+ if (IS_ERR(agent))
+ netdev_warn(priv->netdev,
+ "Failed to create hv vhca stats agent, err = %ld\n",
+ PTR_ERR(agent));
+
+ kvfree(priv->stats_agent.buf);
+ return;
+ }
+
+ priv->stats_agent.agent = agent;
+ INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
+}
+
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+ if (IS_ERR_OR_NULL(priv->stats_agent.agent))
+ return;
+
+ mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
+ kvfree(priv->stats_agent.buf);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
new file mode 100644
index 000000000..29c8c6d32
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_STATS_VHCA_H__
+#define __MLX5_EN_STATS_VHCA_H__
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
+
+#else
+static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {}
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
new file mode 100644
index 000000000..4e72ca807
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <linux/hashtable.h>
+#include <linux/refcount.h>
+
+#include "mapping.h"
+
+#define MAPPING_GRACE_PERIOD 2000
+
+static LIST_HEAD(shared_ctx_list);
+static DEFINE_MUTEX(shared_ctx_lock);
+
+struct mapping_ctx {
+ struct xarray xarray;
+ DECLARE_HASHTABLE(ht, 8);
+ struct mutex lock; /* Guards hashtable and xarray */
+ unsigned long max_id;
+ size_t data_size;
+ bool delayed_removal;
+ struct delayed_work dwork;
+ struct list_head pending_list;
+ spinlock_t pending_list_lock; /* Guards pending list */
+ u64 id;
+ u8 type;
+ struct list_head list;
+ refcount_t refcount;
+};
+
+struct mapping_item {
+ struct rcu_head rcu;
+ struct list_head list;
+ unsigned long timeout;
+ struct hlist_node node;
+ int cnt;
+ u32 id;
+ char data[];
+};
+
+int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id)
+{
+ struct mapping_item *mi;
+ int err = -ENOMEM;
+ u32 hash_key;
+
+ mutex_lock(&ctx->lock);
+
+ hash_key = jhash(data, ctx->data_size, 0);
+ hash_for_each_possible(ctx->ht, mi, node, hash_key) {
+ if (!memcmp(data, mi->data, ctx->data_size))
+ goto attach;
+ }
+
+ mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL);
+ if (!mi)
+ goto err_alloc;
+
+ memcpy(mi->data, data, ctx->data_size);
+ hash_add(ctx->ht, &mi->node, hash_key);
+
+ err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id),
+ GFP_KERNEL);
+ if (err)
+ goto err_assign;
+attach:
+ ++mi->cnt;
+ *id = mi->id;
+
+ mutex_unlock(&ctx->lock);
+
+ return 0;
+
+err_assign:
+ hash_del(&mi->node);
+ kfree(mi);
+err_alloc:
+ mutex_unlock(&ctx->lock);
+
+ return err;
+}
+
+static void mapping_remove_and_free(struct mapping_ctx *ctx,
+ struct mapping_item *mi)
+{
+ xa_erase(&ctx->xarray, mi->id);
+ kfree_rcu(mi, rcu);
+}
+
+static void mapping_free_item(struct mapping_ctx *ctx,
+ struct mapping_item *mi)
+{
+ if (!ctx->delayed_removal) {
+ mapping_remove_and_free(ctx, mi);
+ return;
+ }
+
+ mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD);
+
+ spin_lock(&ctx->pending_list_lock);
+ list_add_tail(&mi->list, &ctx->pending_list);
+ spin_unlock(&ctx->pending_list_lock);
+
+ schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD);
+}
+
+int mapping_remove(struct mapping_ctx *ctx, u32 id)
+{
+ unsigned long index = id;
+ struct mapping_item *mi;
+ int err = -ENOENT;
+
+ mutex_lock(&ctx->lock);
+ mi = xa_load(&ctx->xarray, index);
+ if (!mi)
+ goto out;
+ err = 0;
+
+ if (--mi->cnt > 0)
+ goto out;
+
+ hash_del(&mi->node);
+ mapping_free_item(ctx, mi);
+out:
+ mutex_unlock(&ctx->lock);
+
+ return err;
+}
+
+int mapping_find(struct mapping_ctx *ctx, u32 id, void *data)
+{
+ unsigned long index = id;
+ struct mapping_item *mi;
+ int err = -ENOENT;
+
+ rcu_read_lock();
+ mi = xa_load(&ctx->xarray, index);
+ if (!mi)
+ goto err_find;
+
+ memcpy(data, mi->data, ctx->data_size);
+ err = 0;
+
+err_find:
+ rcu_read_unlock();
+ return err;
+}
+
+static void
+mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list)
+{
+ struct mapping_item *mi;
+
+ list_for_each_entry(mi, list, list)
+ mapping_remove_and_free(ctx, mi);
+}
+
+static void mapping_work_handler(struct work_struct *work)
+{
+ unsigned long min_timeout = 0, now = jiffies;
+ struct mapping_item *mi, *next;
+ LIST_HEAD(pending_items);
+ struct mapping_ctx *ctx;
+
+ ctx = container_of(work, struct mapping_ctx, dwork.work);
+
+ spin_lock(&ctx->pending_list_lock);
+ list_for_each_entry_safe(mi, next, &ctx->pending_list, list) {
+ if (time_after(now, mi->timeout))
+ list_move(&mi->list, &pending_items);
+ else if (!min_timeout ||
+ time_before(mi->timeout, min_timeout))
+ min_timeout = mi->timeout;
+ }
+ spin_unlock(&ctx->pending_list_lock);
+
+ mapping_remove_and_free_list(ctx, &pending_items);
+
+ if (min_timeout)
+ schedule_delayed_work(&ctx->dwork, abs(min_timeout - now));
+}
+
+static void mapping_flush_work(struct mapping_ctx *ctx)
+{
+ if (!ctx->delayed_removal)
+ return;
+
+ cancel_delayed_work_sync(&ctx->dwork);
+ mapping_remove_and_free_list(ctx, &ctx->pending_list);
+}
+
+struct mapping_ctx *
+mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
+{
+ struct mapping_ctx *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ ctx->max_id = max_id ? max_id : UINT_MAX;
+ ctx->data_size = data_size;
+
+ if (delayed_removal) {
+ INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler);
+ INIT_LIST_HEAD(&ctx->pending_list);
+ spin_lock_init(&ctx->pending_list_lock);
+ ctx->delayed_removal = true;
+ }
+
+ mutex_init(&ctx->lock);
+ xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
+
+ refcount_set(&ctx->refcount, 1);
+ INIT_LIST_HEAD(&ctx->list);
+
+ return ctx;
+}
+
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
+{
+ struct mapping_ctx *ctx;
+
+ mutex_lock(&shared_ctx_lock);
+ list_for_each_entry(ctx, &shared_ctx_list, list) {
+ if (ctx->id == id && ctx->type == type) {
+ if (refcount_inc_not_zero(&ctx->refcount))
+ goto unlock;
+ break;
+ }
+ }
+
+ ctx = mapping_create(data_size, max_id, delayed_removal);
+ if (IS_ERR(ctx))
+ goto unlock;
+
+ ctx->id = id;
+ ctx->type = type;
+ list_add(&ctx->list, &shared_ctx_list);
+
+unlock:
+ mutex_unlock(&shared_ctx_lock);
+ return ctx;
+}
+
+void mapping_destroy(struct mapping_ctx *ctx)
+{
+ if (!refcount_dec_and_test(&ctx->refcount))
+ return;
+
+ mutex_lock(&shared_ctx_lock);
+ list_del(&ctx->list);
+ mutex_unlock(&shared_ctx_lock);
+
+ mapping_flush_work(ctx);
+ xa_destroy(&ctx->xarray);
+ mutex_destroy(&ctx->lock);
+
+ kfree(ctx);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
new file mode 100644
index 000000000..4e2119f0f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#ifndef __MLX5_MAPPING_H__
+#define __MLX5_MAPPING_H__
+
+struct mapping_ctx;
+
+int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id);
+int mapping_remove(struct mapping_ctx *ctx, u32 id);
+int mapping_find(struct mapping_ctx *ctx, u32 id, void *data);
+
+/* mapping uses an xarray to map data to ids in add(), and for find().
+ * For locking, it uses a internal xarray spin lock for add()/remove(),
+ * find() uses rcu_read_lock().
+ * Choosing delayed_removal postpones the removal of a previously mapped
+ * id by MAPPING_GRACE_PERIOD milliseconds.
+ * This is to avoid races against hardware, where we mark the packet in
+ * hardware with a previous id, and quick remove() and add() reusing the same
+ * previous id. Then find() will get the new mapping instead of the old
+ * which was used to mark the packet.
+ */
+struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
+ bool delayed_removal);
+void mapping_destroy(struct mapping_ctx *ctx);
+
+/* adds mapping with an id or get an existing mapping with the same id
+ */
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
+
+#endif /* __MLX5_MAPPING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
new file mode 100644
index 000000000..17325c5d6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies
+
+#include <linux/jhash.h>
+#include "mod_hdr.h"
+
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
+
+struct mod_hdr_key {
+ int num_actions;
+ void *actions;
+};
+
+struct mlx5e_mod_hdr_handle {
+ /* a node of a hash table which keeps all the mod_hdr entries */
+ struct hlist_node mod_hdr_hlist;
+
+ struct mod_hdr_key key;
+
+ struct mlx5_modify_hdr *modify_hdr;
+
+ refcount_t refcnt;
+ struct completion res_ready;
+ int compl_result;
+};
+
+static u32 hash_mod_hdr_info(struct mod_hdr_key *key)
+{
+ return jhash(key->actions,
+ key->num_actions * MLX5_MH_ACT_SZ, 0);
+}
+
+static int cmp_mod_hdr_info(struct mod_hdr_key *a, struct mod_hdr_key *b)
+{
+ if (a->num_actions != b->num_actions)
+ return 1;
+
+ return memcmp(a->actions, b->actions,
+ a->num_actions * MLX5_MH_ACT_SZ);
+}
+
+void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl)
+{
+ mutex_init(&tbl->lock);
+ hash_init(tbl->hlist);
+}
+
+void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl)
+{
+ mutex_destroy(&tbl->lock);
+}
+
+static struct mlx5e_mod_hdr_handle *mod_hdr_get(struct mod_hdr_tbl *tbl,
+ struct mod_hdr_key *key,
+ u32 hash_key)
+{
+ struct mlx5e_mod_hdr_handle *mh, *found = NULL;
+
+ hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
+ if (!cmp_mod_hdr_info(&mh->key, key)) {
+ refcount_inc(&mh->refcnt);
+ found = mh;
+ break;
+ }
+ }
+
+ return found;
+}
+
+struct mlx5e_mod_hdr_handle *
+mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ int num_actions, actions_size, err;
+ struct mlx5e_mod_hdr_handle *mh;
+ struct mod_hdr_key key;
+ u32 hash_key;
+
+ num_actions = mod_hdr_acts->num_actions;
+ actions_size = MLX5_MH_ACT_SZ * num_actions;
+
+ key.actions = mod_hdr_acts->actions;
+ key.num_actions = num_actions;
+
+ hash_key = hash_mod_hdr_info(&key);
+
+ mutex_lock(&tbl->lock);
+ mh = mod_hdr_get(tbl, &key, hash_key);
+ if (mh) {
+ mutex_unlock(&tbl->lock);
+ wait_for_completion(&mh->res_ready);
+
+ if (mh->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto attach_header_err;
+ }
+ goto attach_header;
+ }
+
+ mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
+ if (!mh) {
+ mutex_unlock(&tbl->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mh->key.actions = (void *)mh + sizeof(*mh);
+ memcpy(mh->key.actions, key.actions, actions_size);
+ mh->key.num_actions = num_actions;
+ refcount_set(&mh->refcnt, 1);
+ init_completion(&mh->res_ready);
+
+ hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
+ mutex_unlock(&tbl->lock);
+
+ mh->modify_hdr = mlx5_modify_header_alloc(mdev, namespace,
+ mh->key.num_actions,
+ mh->key.actions);
+ if (IS_ERR(mh->modify_hdr)) {
+ err = PTR_ERR(mh->modify_hdr);
+ mh->compl_result = err;
+ goto alloc_header_err;
+ }
+ mh->compl_result = 1;
+ complete_all(&mh->res_ready);
+
+attach_header:
+ return mh;
+
+alloc_header_err:
+ complete_all(&mh->res_ready);
+attach_header_err:
+ mlx5e_mod_hdr_detach(mdev, tbl, mh);
+ return ERR_PTR(err);
+}
+
+void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ struct mlx5e_mod_hdr_handle *mh)
+{
+ if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
+ return;
+ hash_del(&mh->mod_hdr_hlist);
+ mutex_unlock(&tbl->lock);
+
+ if (mh->compl_result > 0)
+ mlx5_modify_header_dealloc(mdev, mh->modify_hdr);
+
+ kfree(mh);
+}
+
+struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh)
+{
+ return mh->modify_hdr;
+}
+
+char *
+mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ int new_num_actions, max_hw_actions;
+ size_t new_sz, old_sz;
+ void *ret;
+
+ if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
+ goto out;
+
+ max_hw_actions = mlx5e_mod_hdr_max_actions(mdev, namespace);
+ new_num_actions = min(max_hw_actions,
+ mod_hdr_acts->actions ?
+ mod_hdr_acts->max_actions * 2 : 1);
+ if (mod_hdr_acts->max_actions == new_num_actions)
+ return ERR_PTR(-ENOSPC);
+
+ new_sz = MLX5_MH_ACT_SZ * new_num_actions;
+ old_sz = mod_hdr_acts->max_actions * MLX5_MH_ACT_SZ;
+
+ if (mod_hdr_acts->is_static) {
+ ret = kzalloc(new_sz, GFP_KERNEL);
+ if (ret) {
+ memcpy(ret, mod_hdr_acts->actions, old_sz);
+ mod_hdr_acts->is_static = false;
+ }
+ } else {
+ ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
+ if (ret)
+ memset(ret + old_sz, 0, new_sz - old_sz);
+ }
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+ mod_hdr_acts->actions = ret;
+ mod_hdr_acts->max_actions = new_num_actions;
+
+out:
+ return mod_hdr_acts->actions + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
+}
+
+void
+mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ if (!mod_hdr_acts->is_static)
+ kfree(mod_hdr_acts->actions);
+
+ mod_hdr_acts->actions = NULL;
+ mod_hdr_acts->num_actions = 0;
+ mod_hdr_acts->max_actions = 0;
+}
+
+char *
+mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos)
+{
+ return mod_hdr_acts->actions + (pos * MLX5_MH_ACT_SZ);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
new file mode 100644
index 000000000..b8dac418d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies */
+
+#ifndef __MLX5E_EN_MOD_HDR_H__
+#define __MLX5E_EN_MOD_HDR_H__
+
+#include <linux/hashtable.h>
+#include <linux/mlx5/fs.h>
+
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
+
+struct mlx5e_mod_hdr_handle;
+
+struct mlx5e_tc_mod_hdr_acts {
+ int num_actions;
+ int max_actions;
+ bool is_static;
+ void *actions;
+};
+
+#define DECLARE_MOD_HDR_ACTS_ACTIONS(name, len) \
+ u8 name[len][MLX5_MH_ACT_SZ] = {}
+
+#define DECLARE_MOD_HDR_ACTS(name, acts_arr) \
+ struct mlx5e_tc_mod_hdr_acts name = { \
+ .max_actions = ARRAY_SIZE(acts_arr), \
+ .is_static = true, \
+ .actions = acts_arr, \
+ }
+
+char *mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+char *mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos);
+
+struct mlx5e_mod_hdr_handle *
+mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev,
+ struct mod_hdr_tbl *tbl,
+ struct mlx5e_mod_hdr_handle *mh);
+struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh);
+
+void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl);
+void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl);
+
+static inline int mlx5e_mod_hdr_max_actions(struct mlx5_core_dev *mdev, int namespace)
+{
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
+ else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+ return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
+}
+
+#endif /* __MLX5E_EN_MOD_HDR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
new file mode 100644
index 000000000..254c84739
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include "en.h"
+#include "monitor_stats.h"
+#include "lib/eq.h"
+
+/* Driver will set the following watch counters list:
+ * Ppcnt.802_3:
+ * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A
+ * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A
+ * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A
+ * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A
+ * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A
+ * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A
+ * Q_Counters:
+ * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix
+ */
+
+#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1
+#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters))
+ return false;
+ if (MLX5_CAP_PCAM_REG(mdev, ppcnt) &&
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) <
+ NUM_REQ_PPCNT_COUNTER_S1)
+ return false;
+ if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) <
+ NUM_REQ_Q_COUNTERS_S1)
+ return false;
+ return true;
+}
+
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
+
+ MLX5_SET(arm_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_ARM_MONITOR_COUNTER);
+ mlx5_cmd_exec_in(priv->mdev, arm_monitor_counter, in);
+}
+
+static void mlx5e_monitor_counters_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ monitor_counters_work);
+
+ mutex_lock(&priv->state_lock);
+ mlx5e_stats_update_ndo_stats(priv);
+ mutex_unlock(&priv->state_lock);
+ mlx5e_monitor_counter_arm(priv);
+}
+
+static int mlx5e_monitor_event_handler(struct notifier_block *nb,
+ unsigned long event, void *eqe)
+{
+ struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv,
+ monitor_counters_nb);
+ queue_work(priv->wq, &priv->monitor_counters_work);
+ return NOTIFY_OK;
+}
+
+static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in)
+{
+ enum mlx5_monitor_counter_ppcnt ppcnt_cnt;
+
+ for (ppcnt_cnt = 0;
+ ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1;
+ ppcnt_cnt++, cnt++) {
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ ppcnt_cnt);
+ }
+ return ppcnt_cnt;
+}
+
+static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in)
+{
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter_group_id,
+ q_counter);
+ return 1;
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters);
+ int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
+ int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+ int q_counter = priv->q_counter;
+ int cnt = 0;
+
+ if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 &&
+ max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt))
+ cnt += fill_monitor_counter_ppcnt_set1(cnt, in);
+
+ if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 &&
+ max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) &&
+ q_counter)
+ cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in);
+
+ MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt);
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec_in(mdev, set_monitor_counter, in);
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
+{
+ INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
+ MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
+ MONITOR_COUNTER);
+ mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
+
+ mlx5e_set_monitor_counter(priv);
+ mlx5e_monitor_counter_arm(priv);
+ queue_work(priv->wq, &priv->update_stats_work);
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec_in(priv->mdev, set_monitor_counter, in);
+ mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+ cancel_work_sync(&priv->monitor_counters_work);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
new file mode 100644
index 000000000..e1ac4b3d2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_MONITOR_H__
+#define __MLX5_MONITOR_H__
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_MONITOR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
new file mode 100644
index 000000000..d3de1b7a8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -0,0 +1,1240 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "en/params.h"
+#include "en/txrx.h"
+#include "en/port.h"
+#include "en_accel/en_accel.h"
+#include "en_accel/ipsec.h"
+#include <net/xdp_sock_drv.h>
+
+static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
+{
+ u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size);
+
+ return min_page_shift ? : 12;
+}
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT;
+ u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev);
+
+ /* Regular RQ uses order-0 pages, the NIC must be able to map them. */
+ if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift))
+ min_page_shift = req_page_shift;
+
+ return max(req_page_shift, min_page_shift);
+}
+
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ /* Different memory management schemes use different mechanisms to map
+ * user-mode memory. The stricter guarantees we have, the faster
+ * mechanisms we use:
+ * 1. MTT - direct mapping in page granularity.
+ * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but
+ * all mappings have the same size.
+ * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and
+ * mappings can have different sizes.
+ */
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ bool oversized = false;
+
+ if (xsk) {
+ oversized = xsk->chunk_size < (1 << page_shift);
+ WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift));
+ }
+
+ /* XSK frame size doesn't match the UMR page size, either because the
+ * frame size is not a power of two, or it's smaller than the minimal
+ * page size supported by the firmware.
+ * It's possible to receive packets bigger than MTU in certain setups.
+ * To avoid writing over the XSK frame boundary, the top region of each
+ * stride is mapped to a garbage page, resulting in two mappings of
+ * different sizes per frame.
+ */
+ if (oversized) {
+ /* An optimization for frame sizes equal to 3 * power_of_two.
+ * 3 KSMs point to the frame, and one KSM points to the garbage
+ * page, which works faster than KLM.
+ */
+ if (xsk->chunk_size % 3 == 0 && is_power_of_2(xsk->chunk_size / 3))
+ return MLX5E_MPWRQ_UMR_MODE_TRIPLE;
+
+ return MLX5E_MPWRQ_UMR_MODE_OVERSIZED;
+ }
+
+ /* XSK frames can start at arbitrary unaligned locations, but they all
+ * have the same size which is a power of two. It allows to optimize to
+ * one KSM per frame.
+ */
+ if (unaligned)
+ return MLX5E_MPWRQ_UMR_MODE_UNALIGNED;
+
+ /* XSK: frames are naturally aligned, MTT can be used.
+ * Non-XSK: Allocations happen in units of CPU pages, therefore, the
+ * mappings are naturally aligned.
+ */
+ return MLX5E_MPWRQ_UMR_MODE_ALIGNED;
+}
+
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode)
+{
+ switch (mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return sizeof(struct mlx5_mtt);
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return sizeof(struct mlx5_ksm);
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ return sizeof(struct mlx5_klm) * 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ return sizeof(struct mlx5_ksm) * 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode);
+ return 0;
+}
+
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u8 max_pages_per_wqe, max_log_mpwqe_size;
+ u16 max_wqe_size;
+
+ /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */
+ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB;
+ max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe),
+ MLX5_UMR_MTT_ALIGNMENT) / umr_entry_size;
+ max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift;
+
+ WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU);
+
+ return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ);
+}
+
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
+ u8 pages_per_wqe;
+
+ pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1;
+
+ /* Two MTTs are needed to form an octword. The number of MTTs is encoded
+ * in octwords in a UMR WQE, so we need at least two to avoid mapping
+ * garbage addresses.
+ */
+ if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ pages_per_wqe = 2;
+
+ /* Sanity check for further calculations to succeed. */
+ BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64);
+ if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE))
+ return MLX5_MPWRQ_MAX_PAGES_PER_WQE;
+
+ return pages_per_wqe;
+}
+
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u16 umr_wqe_sz;
+
+ umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) +
+ ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT);
+
+ WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK);
+
+ return umr_wqe_sz;
+}
+
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, umr_mode),
+ MLX5_SEND_WQE_BB);
+}
+
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+
+ /* Add another page as a buffer between WQEs. This page will absorb
+ * write overflow by the hardware, when receiving packets larger than
+ * MTU. These oversize packets are dropped by the driver at a later
+ * stage.
+ */
+ return ALIGN(pages_per_wqe + 1,
+ MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode));
+}
+
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ /* Same limits apply to KSMs and KLMs. */
+ u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS,
+ 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return MLX5E_MAX_RQ_NUM_MTTS;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return klm_limit;
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ /* Each entry is two KLMs. */
+ return klm_limit / 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ /* Each entry is four KSMs. */
+ return klm_limit / 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
+ return 0;
+}
+
+static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, umr_mode);
+ u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, umr_mode);
+
+ return ilog2(max_entries / mtts_per_wqe);
+}
+
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode) +
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ MLX5E_ORDER2_MAX_PACKET_MTU;
+}
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u16 headroom;
+
+ if (xsk)
+ return xsk->headroom;
+
+ headroom = NET_IP_ALIGN;
+ if (params->xdp_prog)
+ headroom += XDP_PACKET_HEADROOM;
+ else
+ headroom += MLX5_RX_HEADROOM;
+
+ return headroom;
+}
+
+static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+
+ return xsk->headroom + hw_mtu;
+}
+
+static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk)
+{
+ /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */
+ u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL);
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+
+ return MLX5_SKB_FRAG_SZ(headroom + hw_mtu);
+}
+
+static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ bool mpwqe)
+{
+ /* XSK frames are mapped as individual pages, because frames may come in
+ * an arbitrary order from random locations in the UMEM.
+ */
+ if (xsk)
+ return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
+
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (params->xdp_prog)
+ return PAGE_SIZE;
+
+ return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
+}
+
+static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ order_base_2(linear_stride_sz);
+}
+
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE)
+ return false;
+
+ /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
+ * must fit into a CPU page.
+ */
+ if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE)
+ return false;
+
+ /* XSK frames must be big enough to hold the packet data. */
+ if (xsk && mlx5e_rx_get_linear_sz_xsk(params, xsk) > xsk->chunk_size)
+ return false;
+
+ return true;
+}
+
+static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+ u8 log_stride_sz, u8 log_num_strides,
+ u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ if (log_stride_sz + log_num_strides !=
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode))
+ return false;
+
+ if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE ||
+ log_stride_sz > MLX5_MPWQE_LOG_STRIDE_SZ_MAX)
+ return false;
+
+ if (log_num_strides > MLX5_MPWQE_LOG_NUM_STRIDES_MAX)
+ return false;
+
+ if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
+ return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE;
+
+ return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+}
+
+bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 log_num_strides;
+ u8 log_stride_sz;
+ u8 log_wqe_sz;
+
+ if (!mlx5e_rx_is_linear_skb(mdev, params, xsk))
+ return false;
+
+ log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+ log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
+
+ if (log_wqe_sz < log_stride_sz)
+ return false;
+
+ log_num_strides = log_wqe_sz - log_stride_sz;
+
+ return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz,
+ log_num_strides, page_shift,
+ umr_mode);
+}
+
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 log_pkts_per_wqe, page_shift, max_log_rq_size;
+
+ log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk);
+ page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode);
+
+ /* Numbers are unsigned, don't subtract to avoid underflow. */
+ if (params->log_rq_mtu_frames <
+ log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
+ return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+
+ /* Ethtool's rx_max_pending is calculated for regular RQ, that uses
+ * pages of PAGE_SIZE. Max length of an XSK RQ might differ if it uses a
+ * frame size not equal to PAGE_SIZE.
+ * A stricter condition is checked in mlx5e_mpwrq_validate_xsk, WARN on
+ * unexpected failure.
+ */
+ if (WARN_ON_ONCE(params->log_rq_mtu_frames > log_pkts_per_wqe + max_log_rq_size))
+ return max_log_rq_size;
+
+ return params->log_rq_mtu_frames - log_pkts_per_wqe;
+}
+
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE));
+}
+
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE);
+}
+
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) *
+ PAGE_SIZE;
+
+ return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu));
+}
+
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+
+ return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+}
+
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+}
+
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz)
+{
+#define UMR_WQE_BULK (2)
+ return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1);
+}
+
+u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
+ return linear_headroom;
+
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return linear_headroom;
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ return linear_headroom;
+
+ return 0;
+}
+
+u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
+ u16 stop_room;
+
+ stop_room = mlx5e_ktls_get_stop_room(mdev, params);
+ stop_room += mlx5e_stop_room_for_max_wqe(mdev);
+ if (is_mpwqe)
+ /* A MPWQE can take up to the maximum cacheline-aligned WQE +
+ * all the normal stop room can be taken if a new packet breaks
+ * the active MPWQE session and allocates its WQEs right away.
+ */
+ stop_room += mlx5e_stop_room_for_mpwqe(mdev);
+
+ return stop_room;
+}
+
+int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ size_t sq_size = 1 << params->log_sq_size;
+ u16 stop_room;
+
+ stop_room = mlx5e_calc_sq_stop_room(mdev, params);
+ if (stop_room >= sq_size) {
+ mlx5_core_err(mdev, "Stop room %u is bigger than the SQ size %zu\n",
+ stop_room, sq_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+{
+ struct dim_cq_moder moder = {};
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+{
+ struct dim_cq_moder moder = {};
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
+{
+ return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->tx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
+ } else {
+ params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
+ }
+}
+
+void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->rx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
+ } else {
+ params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
+ }
+}
+
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ mlx5e_reset_tx_moderation(params, cq_period_mode);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ params->tx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ mlx5e_reset_rx_moderation(params, cq_period_mode);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ params->rx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
+{
+ u32 link_speed = 0;
+ u32 pci_bw = 0;
+
+ mlx5e_port_max_linkspeed(mdev, &link_speed);
+ pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
+ mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
+ link_speed, pci_bw);
+
+#define MLX5E_SLOW_PCI_RATIO (2)
+
+ return link_speed && pci_bw &&
+ link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
+}
+
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, NULL);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL);
+
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
+
+ if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ u16 max_mtu_pkts;
+
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
+
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return -EINVAL;
+
+ /* Current RQ length is too big for the given frame size, the
+ * needed number of WQEs exceeds the maximum.
+ */
+ max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
+ mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, unaligned));
+ if (params->log_rq_mtu_frames > max_mtu_pkts) {
+ mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n",
+ 1 << params->log_rq_mtu_frames, xsk->chunk_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ params->log_rq_mtu_frames = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+ MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+ mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+ BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) :
+ BIT(params->log_rq_mtu_frames),
+ BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
+ MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+}
+
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ params->rq_wq_type = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
+ MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+ MLX5_WQ_TYPE_CYCLIC;
+}
+
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ /* Prefer Striding RQ, unless any of the following holds:
+ * - Striding RQ configuration is not possible/supported.
+ * - CQE compression is ON, and stride_index mini_cqe layout is not supported.
+ * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+ *
+ * No XSK params: checking the availability of striding RQ in general.
+ */
+ if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ||
+ MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) &&
+ !mlx5e_mpwrq_validate_regular(mdev, params) &&
+ (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
+ !mlx5e_rx_is_linear_skb(mdev, params, NULL)))
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
+ mlx5e_set_rq_type(mdev, params);
+ mlx5e_init_rq_type_params(mdev, params);
+}
+
+/* Build queue parameters */
+
+void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c)
+{
+ *ccp = (struct mlx5e_create_cq_param) {
+ .napi = &c->napi,
+ .ch_stats = c->stats,
+ .node = cpu_to_node(c->cpu),
+ .ix = c->ix,
+ };
+}
+
+static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp)
+{
+ if (xdp)
+ /* XDP requires all fragments to be of the same size. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 1) * frag_size;
+
+ /* Optimization for small packets: the last fragment is bigger than the others. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
+}
+
+#define DEFAULT_FRAG_SIZE (2048)
+
+static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_frags_info *info)
+{
+ u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ int frag_size_max = DEFAULT_FRAG_SIZE;
+ int first_frag_size_max;
+ u32 buf_size = 0;
+ u16 headroom;
+ int max_mtu;
+ int i;
+
+ if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) {
+ int frag_stride;
+
+ frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false);
+
+ info->arr[0].frag_size = byte_count;
+ info->arr[0].frag_stride = frag_stride;
+ info->num_frags = 1;
+
+ /* N WQEs share the same page, N = PAGE_SIZE / frag_stride. The
+ * first WQE in the page is responsible for allocation of this
+ * page, this WQE's index is k*N. If WQEs [k*N+1; k*N+N-1] are
+ * still not completed, the allocation must stop before k*N.
+ */
+ info->wqe_index_mask = (PAGE_SIZE / frag_stride) - 1;
+
+ goto out;
+ }
+
+ headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu || params->xdp_prog) {
+ frag_size_max = PAGE_SIZE;
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu) {
+ mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n",
+ params->sw_mtu, max_mtu);
+ return -EINVAL;
+ }
+ }
+
+ i = 0;
+ while (buf_size < byte_count) {
+ int frag_size = byte_count - buf_size;
+
+ if (i == 0)
+ frag_size = min(frag_size, first_frag_size_max);
+ else if (i < MLX5E_MAX_RX_FRAGS - 1)
+ frag_size = min(frag_size, frag_size_max);
+
+ info->arr[i].frag_size = frag_size;
+ buf_size += frag_size;
+
+ if (params->xdp_prog) {
+ /* XDP multi buffer expects fragments of the same size. */
+ info->arr[i].frag_stride = frag_size_max;
+ } else {
+ if (i == 0) {
+ /* Ensure that headroom and tailroom are included. */
+ frag_size += headroom;
+ frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ }
+ info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
+ }
+
+ i++;
+ }
+ info->num_frags = i;
+
+ /* The last fragment of WQE with index 2*N may share the page with the
+ * first fragment of WQE with index 2*N+1 in certain cases. If WQE 2*N+1
+ * is not completed yet, WQE 2*N must not be allocated, as it's
+ * responsible for allocating a new page.
+ */
+ if (frag_size_max == PAGE_SIZE) {
+ /* No WQE can start in the middle of a page. */
+ info->wqe_index_mask = 0;
+ } else {
+ /* PAGE_SIZEs starting from 8192 don't use 2K-sized fragments,
+ * because there would be more than MLX5E_MAX_RX_FRAGS of them.
+ */
+ WARN_ON(PAGE_SIZE != 2 * DEFAULT_FRAG_SIZE);
+
+ /* Odd number of fragments allows to pack the last fragment of
+ * the previous WQE and the first fragment of the next WQE into
+ * the same page.
+ * As long as DEFAULT_FRAG_SIZE is 2048, and MLX5E_MAX_RX_FRAGS
+ * is 4, the last fragment can be bigger than the rest only if
+ * it's the fourth one, so WQEs consisting of 3 fragments will
+ * always share a page.
+ * When a page is shared, WQE bulk size is 2, otherwise just 1.
+ */
+ info->wqe_index_mask = info->num_frags % 2;
+ }
+
+out:
+ /* Bulking optimization to skip allocation until at least 8 WQEs can be
+ * allocated in a row. At the same time, never start allocation when
+ * the page is still used by older WQEs.
+ */
+ info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
+
+ info->log_num_frags = order_base_2(info->num_frags);
+
+ return 0;
+}
+
+static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
+{
+ int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
+
+ switch (wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ sz += sizeof(struct mlx5e_rx_wqe_ll);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ sz += sizeof(struct mlx5e_rx_wqe_cyc);
+ }
+
+ return order_base_2(sz);
+}
+
+static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
+}
+
+static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+ u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+ int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+ int wqe_size = BIT(log_stride_sz) * num_strides;
+
+ /* +1 is for the case that the pkt_per_rsrv dont consume the reservation
+ * so we get a filler cqe for the rest of the reservation.
+ */
+ return order_base_2((wqe_size / rsrv_size) * wq_size * (pkt_per_rsrv + 1));
+}
+
+static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_cq_param *param)
+{
+ bool hw_stridx = false;
+ void *cqc = param->cqc;
+ u8 log_cq_size;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk);
+ else
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) +
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ log_cq_size = params->log_rq_mtu_frames;
+ }
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
+ MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
+ MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+ }
+
+ mlx5e_build_common_cq_param(mdev, param);
+ param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
+}
+
+static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
+ bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+ MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+
+ return ro && lro_en ?
+ MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
+}
+
+int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ u16 q_counter,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int ndsegs = 1;
+ int err;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: {
+ u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
+ log_wqe_num_of_strides,
+ page_shift, umr_mode)) {
+ mlx5_core_err(mdev,
+ "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n",
+ log_wqe_stride_size, log_wqe_num_of_strides,
+ umr_mode);
+ return -EINVAL;
+ }
+
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
+ MLX5_SET(wq, wq, log_wqe_stride_size,
+ log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
+ MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ MLX5_SET(wq, wq, shampo_enable, true);
+ MLX5_SET(wq, wq, log_reservation_size,
+ mlx5e_shampo_get_log_rsrv_size(mdev, params));
+ MLX5_SET(wq, wq,
+ log_max_num_of_packets_per_reservation,
+ mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ MLX5_SET(wq, wq, log_headers_entry_size,
+ mlx5e_shampo_get_log_hd_entry_size(mdev, params));
+ MLX5_SET(rqc, rqc, reservation_timeout,
+ params->packet_merge.timeout);
+ MLX5_SET(rqc, rqc, shampo_match_criteria_type,
+ params->packet_merge.shampo.match_criteria_type);
+ MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
+ params->packet_merge.shampo.alignment_granularity);
+ }
+ break;
+ }
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
+ err = mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
+ if (err)
+ return err;
+ ndsegs = param->frags_info.num_frags;
+ }
+
+ MLX5_SET(wq, wq, wq_type, params->rq_wq_type);
+ MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params));
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
+ MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(rqc, rqc, counter_set_id, q_counter);
+ MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
+ MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ mlx5e_build_rx_cq_param(mdev, params, xsk, &param->cqp);
+
+ return 0;
+}
+
+void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
+ u16 q_counter,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
+ MLX5_SET(rqc, rqc, counter_set_id, q_counter);
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+}
+
+void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
+
+ mlx5e_build_common_cq_param(mdev, param);
+ param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
+}
+
+void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn);
+
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+}
+
+void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ bool allow_swp;
+
+ allow_swp = mlx5_geneve_tx_allowed(mdev) ||
+ (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO);
+ mlx5e_build_sq_param_common(mdev, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ MLX5_SET(sqc, sqc, allow_swp, allow_swp);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
+ param->stop_room = mlx5e_calc_sq_stop_room(mdev, params);
+ mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
+}
+
+static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
+
+ mlx5e_build_common_cq_param(mdev, param);
+
+ param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+/* This function calculates the maximum number of headers entries that are needed
+ * per WQE, the formula is based on the size of the reservations and the
+ * restriction we have about max packets for reservation that is equal to max
+ * headers per reservation.
+ */
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+ u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL));
+ int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL);
+ int wqe_size = BIT(log_stride_sz) * num_strides;
+ u32 hd_per_wqe;
+
+ /* Assumption: hd_per_wqe % 8 == 0. */
+ hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv;
+ mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n",
+ __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv);
+ return hd_per_wqe;
+}
+
+/* This function calculates the maximum number of headers entries that are needed
+ * for the WQ, this value is uesed to allocate the header buffer in HW, thus
+ * must be a pow of 2.
+ */
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+ int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ u32 hd_per_wqe, hd_per_wq;
+
+ hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+ hd_per_wq = roundup_pow_of_two(hd_per_wqe * wq_size);
+ return hd_per_wq;
+}
+
+static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest;
+ void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+ int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ u32 wqebbs;
+
+ max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
+ max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+ max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
+ rest = max_hd_per_wqe % max_klm_per_umr;
+ wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe;
+ if (rest)
+ wqebbs += MLX5E_KLM_UMR_WQEBBS(rest);
+ wqebbs *= wq_size;
+ return wqebbs;
+}
+
+static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 umr_wqebbs;
+
+ umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
+
+ return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+}
+
+static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rqp)
+{
+ u32 wqebbs, total_pages, useful_space;
+
+ /* MLX5_WQ_TYPE_CYCLIC */
+ if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+ /* UMR WQEs for the regular RQ. */
+ wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL);
+
+ /* If XDP program is attached, XSK may be turned on at any time without
+ * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of
+ * both regular RQ and XSK RQ.
+ *
+ * XSK uses different values of page_shift, and the total number of UMR
+ * WQEBBs depends on it. This dependency is complex and not monotonic,
+ * especially taking into consideration that some of the parameters come
+ * from capabilities. Hence, we have to try all valid values of XSK
+ * frame size (and page_shift) to find the maximum.
+ */
+ if (params->xdp_prog) {
+ u32 max_xsk_wqebbs = 0;
+ u8 frame_shift;
+
+ for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT;
+ frame_shift <= PAGE_SHIFT; frame_shift++) {
+ /* The headroom doesn't affect the calculation. */
+ struct mlx5e_xsk_param xsk = {
+ .chunk_size = 1 << frame_shift,
+ .unaligned = false,
+ };
+
+ /* XSK aligned mode. */
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a power of two. */
+ xsk.unaligned = true;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is not equal to stride size. */
+ xsk.chunk_size -= 1;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a triple power of two. */
+ xsk.chunk_size = (1 << frame_shift) / 4 * 3;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+ }
+
+ wqebbs += max_xsk_wqebbs;
+ }
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp);
+
+ /* UMR WQEs don't cross the page boundary, they are padded with NOPs.
+ * This padding is always smaller than the max WQE size. That gives us
+ * at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes
+ * per page. The number of pages is estimated as the total size of WQEs
+ * divided by the useful space in page, rounding up. If some WQEs don't
+ * fully fit into the useful space, they can occupy part of the padding,
+ * which proves this estimation to be correct (reserve enough space).
+ */
+ useful_space = PAGE_SIZE - mlx5e_get_max_sq_wqebbs(mdev) + MLX5_SEND_WQE_BB;
+ total_pages = DIV_ROUND_UP(wqebbs * MLX5_SEND_WQE_BB, useful_space);
+ wqebbs = total_pages * (PAGE_SIZE / MLX5_SEND_WQE_BB);
+
+ return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs));
+}
+
+static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
+{
+ if (mlx5e_is_ktls_rx(mdev))
+ return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+
+ return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+}
+
+static void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(mdev, param);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
+ mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
+}
+
+static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(mdev, param);
+ param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */
+ param->is_tls = mlx5e_is_ktls_rx(mdev);
+ if (param->is_tls)
+ param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */
+ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+ mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
+}
+
+void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(mdev, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
+ param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk);
+ mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
+}
+
+int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 q_counter,
+ struct mlx5e_channel_param *cparam)
+{
+ u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
+ int err;
+
+ err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq);
+ if (err)
+ return err;
+
+ icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(mdev, params, &cparam->rq);
+ async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
+
+ mlx5e_build_sq_param(mdev, params, &cparam->txq_sq);
+ mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq);
+ mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq);
+ mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
new file mode 100644
index 000000000..034debd14
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_PARAMS_H__
+#define __MLX5_EN_PARAMS_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param {
+ u16 headroom;
+ u16 chunk_size;
+ bool unaligned;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc[MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+ u16 eq_ix;
+ u8 cq_period_mode;
+};
+
+struct mlx5e_rq_param {
+ struct mlx5e_cq_param cqp;
+ u32 rqc[MLX5_ST_SZ_DW(rqc)];
+ struct mlx5_wq_param wq;
+ struct mlx5e_rq_frags_info frags_info;
+};
+
+struct mlx5e_sq_param {
+ struct mlx5e_cq_param cqp;
+ u32 sqc[MLX5_ST_SZ_DW(sqc)];
+ struct mlx5_wq_param wq;
+ bool is_mpw;
+ bool is_tls;
+ bool is_xdp_mb;
+ u16 stop_room;
+};
+
+struct mlx5e_channel_param {
+ struct mlx5e_rq_param rq;
+ struct mlx5e_sq_param txq_sq;
+ struct mlx5e_sq_param xdp_sq;
+ struct mlx5e_sq_param icosq;
+ struct mlx5e_sq_param async_icosq;
+};
+
+struct mlx5e_create_sq_param {
+ struct mlx5_wq_ctrl *wq_ctrl;
+ u32 cqn;
+ u32 ts_cqe_to_dest_cqn;
+ u32 tisn;
+ u8 tis_lst_sz;
+ u8 min_inline_mode;
+};
+
+/* Striding RQ dynamic parameters */
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode);
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+
+/* Parameter calculations */
+
+void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
+
+bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param);
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param);
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz);
+u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+
+/* Build queue parameters */
+
+void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c);
+int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ u16 q_counter,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
+ u16 q_counter,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_sq_param *param);
+int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 q_counter,
+ struct mlx5e_channel_param *cparam);
+
+u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+
+#endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
new file mode 100644
index 000000000..89510cac4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "port.h"
+
+/* speed in units of 1Mb */
+static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
+ [MLX5E_1000BASE_CX_SGMII] = 1000,
+ [MLX5E_1000BASE_KX] = 1000,
+ [MLX5E_10GBASE_CX4] = 10000,
+ [MLX5E_10GBASE_KX4] = 10000,
+ [MLX5E_10GBASE_KR] = 10000,
+ [MLX5E_20GBASE_KR2] = 20000,
+ [MLX5E_40GBASE_CR4] = 40000,
+ [MLX5E_40GBASE_KR4] = 40000,
+ [MLX5E_56GBASE_R4] = 56000,
+ [MLX5E_10GBASE_CR] = 10000,
+ [MLX5E_10GBASE_SR] = 10000,
+ [MLX5E_10GBASE_ER] = 10000,
+ [MLX5E_40GBASE_SR4] = 40000,
+ [MLX5E_40GBASE_LR4] = 40000,
+ [MLX5E_50GBASE_SR2] = 50000,
+ [MLX5E_100GBASE_CR4] = 100000,
+ [MLX5E_100GBASE_SR4] = 100000,
+ [MLX5E_100GBASE_KR4] = 100000,
+ [MLX5E_100GBASE_LR4] = 100000,
+ [MLX5E_100BASE_TX] = 100,
+ [MLX5E_1000BASE_T] = 1000,
+ [MLX5E_10GBASE_T] = 10000,
+ [MLX5E_25GBASE_CR] = 25000,
+ [MLX5E_25GBASE_KR] = 25000,
+ [MLX5E_25GBASE_SR] = 25000,
+ [MLX5E_50GBASE_CR2] = 50000,
+ [MLX5E_50GBASE_KR2] = 50000,
+};
+
+static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
+ [MLX5E_SGMII_100M] = 100,
+ [MLX5E_1000BASE_X_SGMII] = 1000,
+ [MLX5E_5GBASE_R] = 5000,
+ [MLX5E_10GBASE_XFI_XAUI_1] = 10000,
+ [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000,
+ [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000,
+ [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000,
+ [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000,
+ [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000,
+ [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000,
+ [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000,
+ [MLX5E_400GAUI_8] = 400000,
+ [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000,
+ [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
+ [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
+};
+
+bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev)
+{
+ struct mlx5e_port_eth_proto eproto;
+ int err;
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet))
+ return true;
+
+ err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto);
+ if (err)
+ return false;
+
+ return !!eproto.cap;
+}
+
+static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
+ const u32 **arr, u32 *size,
+ bool force_legacy)
+{
+ bool ext = force_legacy ? false : mlx5e_ptys_ext_supported(mdev);
+
+ *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
+ ARRAY_SIZE(mlx5e_link_speed);
+ *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
+}
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+ struct mlx5e_port_eth_proto *eproto)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ if (!eproto)
+ return -EINVAL;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
+ if (err)
+ return err;
+
+ eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_capability);
+ eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
+ eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
+ return 0;
+}
+
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+ u8 *an_disable_cap, u8 *an_disable_admin)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+
+ *an_status = 0;
+ *an_disable_cap = 0;
+ *an_disable_admin = 0;
+
+ if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1))
+ return;
+
+ *an_status = MLX5_GET(ptys_reg, out, an_status);
+ *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
+ *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+}
+
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+ u32 proto_admin, bool ext)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+ u8 an_disable_admin;
+ u8 an_disable_cap;
+ u8 an_status;
+
+ mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap,
+ &an_disable_admin);
+ if (!an_disable_cap && an_disable)
+ return -EPERM;
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(ptys_reg, in, local_port, 1);
+ MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
+ MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN);
+ if (ext)
+ MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin);
+ else
+ MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PTYS, 0, 1);
+}
+
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+ bool force_legacy)
+{
+ unsigned long temp = eth_proto_oper;
+ const u32 *table;
+ u32 speed = 0;
+ u32 max_size;
+ int i;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+ i = find_first_bit(&temp, max_size);
+ if (i < max_size)
+ speed = table[i];
+ return speed;
+}
+
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ struct mlx5e_port_eth_proto eproto;
+ bool force_legacy = false;
+ bool ext;
+ int err;
+
+ ext = mlx5e_ptys_ext_supported(mdev);
+ err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
+ if (err)
+ goto out;
+ if (ext && !eproto.admin) {
+ force_legacy = true;
+ err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto);
+ if (err)
+ goto out;
+ }
+ *speed = mlx5e_port_ptys2speed(mdev, eproto.oper, force_legacy);
+ if (!(*speed))
+ err = -EINVAL;
+
+out:
+ return err;
+}
+
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ struct mlx5e_port_eth_proto eproto;
+ u32 max_speed = 0;
+ const u32 *table;
+ u32 max_size;
+ bool ext;
+ int err;
+ int i;
+
+ ext = mlx5e_ptys_ext_supported(mdev);
+ err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
+ if (err)
+ return err;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, false);
+ for (i = 0; i < max_size; ++i)
+ if (eproto.cap & MLX5E_PROT_MASK(i))
+ max_speed = max(max_speed, table[i]);
+
+ *speed = max_speed;
+ return 0;
+}
+
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+ bool force_legacy)
+{
+ u32 link_modes = 0;
+ const u32 *table;
+ u32 max_size;
+ int i;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+ for (i = 0; i < max_size; ++i) {
+ if (table[i] == speed)
+ link_modes |= MLX5E_PROT_MASK(i);
+ }
+ return link_modes;
+}
+
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out)
+{
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *in;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(pbmc_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 0);
+
+ kfree(in);
+ return err;
+}
+
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in)
+{
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *out;
+ int err;
+
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(pbmc_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 1);
+
+ kfree(out);
+ return err;
+}
+
+/* buffer[i]: buffer that priority i mapped to */
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+ int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+ u32 prio_x_buff;
+ void *out;
+ void *in;
+ int prio;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(pptb_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+ if (err)
+ goto out;
+
+ prio_x_buff = MLX5_GET(pptb_reg, out, prio_x_buff);
+ for (prio = 0; prio < 8; prio++) {
+ buffer[prio] = (u8)(prio_x_buff >> (4 * prio)) & 0xF;
+ mlx5_core_dbg(mdev, "prio %d, buffer %d\n", prio, buffer[prio]);
+ }
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+ int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+ u32 prio_x_buff;
+ void *out;
+ void *in;
+ int prio;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* First query the pptb register */
+ MLX5_SET(pptb_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+ if (err)
+ goto out;
+
+ memcpy(in, out, sz);
+ MLX5_SET(pptb_reg, in, local_port, 1);
+
+ /* Update the pm and prio_x_buff */
+ MLX5_SET(pptb_reg, in, pm, 0xFF);
+
+ prio_x_buff = 0;
+ for (prio = 0; prio < 8; prio++)
+ prio_x_buff |= (buffer[prio] << (4 * prio));
+ MLX5_SET(pptb_reg, in, prio_x_buff, prio_x_buff);
+
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 1);
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+
+enum mlx5e_fec_supported_link_mode {
+ MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_25G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_50G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_56G,
+ MLX5E_FEC_SUPPORTED_LINK_MODES_100G,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X,
+ MLX5E_MAX_FEC_SUPPORTED_LINK_MODE,
+};
+
+#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X
+
+#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \
+ do { \
+ u16 *_policy = &(policy); \
+ u32 *_buf = buf; \
+ \
+ if (write) \
+ MLX5_SET(pplm_reg, _buf, fec_override_admin_##link, *_policy); \
+ else \
+ *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \
+ } while (0)
+
+/* get/set FEC admin field for a given speed */
+static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write,
+ enum mlx5e_fec_supported_link_mode link_mode)
+{
+ switch (link_mode) {
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 10g_40g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_25G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 25g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_50G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_56G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 56g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_100G:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#define MLX5E_GET_FEC_OVERRIDE_CAP(buf, link) \
+ MLX5_GET(pplm_reg, buf, fec_override_cap_##link)
+
+/* returns FEC capabilities for a given speed */
+static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap,
+ enum mlx5e_fec_supported_link_mode link_mode)
+{
+ switch (link_mode) {
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 10g_40g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_25G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 25g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_50G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_56G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 56g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODES_100G:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_8x);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy)
+{
+ bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg) || !MLX5_CAP_PCAM_REG(dev, pplm))
+ return false;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return false;
+
+ for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) {
+ u16 fec_caps;
+
+ if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane)
+ break;
+
+ mlx5e_get_fec_cap_field(out, &fec_caps, i);
+ if (fec_caps & fec_policy)
+ return true;
+ }
+ return false;
+}
+
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+ u16 *fec_configured_mode)
+{
+ bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active);
+
+ if (!fec_configured_mode)
+ goto out;
+
+ *fec_configured_mode = 0;
+ for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) {
+ if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane)
+ break;
+
+ mlx5e_fec_admin_field(out, fec_configured_mode, 0, i);
+ if (*fec_configured_mode != 0)
+ goto out;
+ }
+out:
+ return 0;
+}
+
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
+{
+ bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ u16 fec_policy_auto = 0;
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane)
+ return -EOPNOTSUPP;
+
+ if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ MLX5_SET(pplm_reg, out, local_port, 1);
+
+ for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) {
+ u16 conf_fec = fec_policy;
+ u16 fec_caps = 0;
+
+ if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane)
+ break;
+
+ /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514
+ * to link modes up to 25G per lane and to
+ * MLX5E_FEC_RS_544_514 in the new link modes based on
+ * 50 G per lane
+ */
+ if (conf_fec == (1 << MLX5E_FEC_RS_528_514) &&
+ i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE)
+ conf_fec = (1 << MLX5E_FEC_RS_544_514);
+
+ mlx5e_get_fec_cap_field(out, &fec_caps, i);
+
+ /* policy supported for link speed */
+ if (fec_caps & conf_fec)
+ mlx5e_fec_admin_field(out, &conf_fec, 1, i);
+ else
+ /* set FEC to auto*/
+ mlx5e_fec_admin_field(out, &fec_policy_auto, 1, i);
+ }
+
+ return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
new file mode 100644
index 000000000..7a7defe60
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_EN_PORT_H
+#define __MLX5E_EN_PORT_H
+
+#include <linux/mlx5/driver.h>
+#include "en.h"
+
+struct mlx5e_port_eth_proto {
+ u32 cap;
+ u32 admin;
+ u32 oper;
+};
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+ struct mlx5e_port_eth_proto *eproto);
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+ u8 *an_disable_cap, u8 *an_disable_admin);
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+ u32 proto_admin, bool ext);
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+ bool force_legacy);
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+ bool force_legacy);
+bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev);
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+
+bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy);
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+ u16 *fec_configured_mode);
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy);
+
+enum {
+ MLX5E_FEC_NOFEC,
+ MLX5E_FEC_FIRECODE,
+ MLX5E_FEC_RS_528_514,
+ MLX5E_FEC_RS_544_514 = 7,
+ MLX5E_FEC_LLRS_272_257_1 = 9,
+};
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
new file mode 100644
index 000000000..c9d5d8d93
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "port_buffer.h"
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer)
+{
+ u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ u32 total_used = 0;
+ void *buffer;
+ void *out;
+ int err;
+ int i;
+
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5e_port_query_pbmc(mdev, out);
+ if (err)
+ goto out;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
+ port_buffer->buffer[i].lossy =
+ MLX5_GET(bufferx_reg, buffer, lossy);
+ port_buffer->buffer[i].epsb =
+ MLX5_GET(bufferx_reg, buffer, epsb);
+ port_buffer->buffer[i].size =
+ MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz;
+ port_buffer->buffer[i].xon =
+ MLX5_GET(bufferx_reg, buffer, xon_threshold) * port_buff_cell_sz;
+ port_buffer->buffer[i].xoff =
+ MLX5_GET(bufferx_reg, buffer, xoff_threshold) * port_buff_cell_sz;
+ total_used += port_buffer->buffer[i].size;
+
+ mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i,
+ port_buffer->buffer[i].size,
+ port_buffer->buffer[i].xon,
+ port_buffer->buffer[i].xoff,
+ port_buffer->buffer[i].epsb,
+ port_buffer->buffer[i].lossy);
+ }
+
+ port_buffer->port_buffer_size =
+ MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz;
+ port_buffer->spare_buffer_size =
+ port_buffer->port_buffer_size - total_used;
+
+ mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n",
+ port_buffer->port_buffer_size,
+ port_buffer->spare_buffer_size);
+out:
+ kfree(out);
+ return err;
+}
+
+static int port_set_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer)
+{
+ u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *in;
+ int err;
+ int i;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = mlx5e_port_query_pbmc(mdev, in);
+ if (err)
+ goto out;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
+ u64 size = port_buffer->buffer[i].size;
+ u64 xoff = port_buffer->buffer[i].xoff;
+ u64 xon = port_buffer->buffer[i].xon;
+
+ do_div(size, port_buff_cell_sz);
+ do_div(xoff, port_buff_cell_sz);
+ do_div(xon, port_buff_cell_sz);
+ MLX5_SET(bufferx_reg, buffer, size, size);
+ MLX5_SET(bufferx_reg, buffer, lossy, port_buffer->buffer[i].lossy);
+ MLX5_SET(bufferx_reg, buffer, xoff_threshold, xoff);
+ MLX5_SET(bufferx_reg, buffer, xon_threshold, xon);
+ }
+
+ err = mlx5e_port_set_pbmc(mdev, in);
+out:
+ kfree(in);
+ return err;
+}
+
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B])
+ * minimum speed value is 40Gbps
+ */
+static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
+{
+ u32 speed;
+ u32 xoff;
+ int err;
+
+ err = mlx5e_port_linkspeed(priv->mdev, &speed);
+ if (err)
+ speed = SPEED_40000;
+ speed = max_t(u32, speed, SPEED_40000);
+
+ xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
+
+ mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff);
+ return xoff;
+}
+
+static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
+ u32 xoff, unsigned int max_mtu, u16 port_buff_cell_sz)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ if (port_buffer->buffer[i].lossy) {
+ port_buffer->buffer[i].xoff = 0;
+ port_buffer->buffer[i].xon = 0;
+ continue;
+ }
+
+ if (port_buffer->buffer[i].size <
+ (xoff + max_mtu + port_buff_cell_sz)) {
+ pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n",
+ i, port_buffer->buffer[i].size);
+ return -ENOMEM;
+ }
+
+ port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
+ port_buffer->buffer[i].xon =
+ port_buffer->buffer[i].xoff - max_mtu;
+ }
+
+ return 0;
+}
+
+/**
+ * update_buffer_lossy - Update buffer configuration based on pfc
+ * @max_mtu: netdev's max_mtu
+ * @pfc_en: <input> current pfc configuration
+ * @buffer: <input> current prio to buffer mapping
+ * @xoff: <input> xoff value
+ * @port_buff_cell_sz: <input> port buffer cell_size
+ * @port_buffer: <output> port receive buffer configuration
+ * @change: <output>
+ *
+ * Update buffer configuration based on pfc configuration and
+ * priority to buffer mapping.
+ * Buffer's lossy bit is changed to:
+ * lossless if there is at least one PFC enabled priority
+ * mapped to this buffer lossy if all priorities mapped to
+ * this buffer are PFC disabled
+ *
+ * @return: 0 if no error,
+ * sets change to true if buffer configuration was modified.
+ */
+static int update_buffer_lossy(unsigned int max_mtu,
+ u8 pfc_en, u8 *buffer, u32 xoff, u16 port_buff_cell_sz,
+ struct mlx5e_port_buffer *port_buffer,
+ bool *change)
+{
+ bool changed = false;
+ u8 lossy_count;
+ u8 prio_count;
+ u8 lossy;
+ int prio;
+ int err;
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ prio_count = 0;
+ lossy_count = 0;
+
+ for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) {
+ if (buffer[prio] != i)
+ continue;
+
+ prio_count++;
+ lossy_count += !(pfc_en & (1 << prio));
+ }
+
+ if (lossy_count == prio_count)
+ lossy = 1;
+ else /* lossy_count < prio_count */
+ lossy = 0;
+
+ if (lossy != port_buffer->buffer[i].lossy) {
+ port_buffer->buffer[i].lossy = lossy;
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz);
+ if (err)
+ return err;
+
+ *change = true;
+ }
+
+ return 0;
+}
+
+static int fill_pfc_en(struct mlx5_core_dev *mdev, u8 *pfc_en)
+{
+ u32 g_rx_pause, g_tx_pause;
+ int err;
+
+ err = mlx5_query_port_pause(mdev, &g_rx_pause, &g_tx_pause);
+ if (err)
+ return err;
+
+ /* If global pause enabled, set all active buffers to lossless.
+ * Otherwise, check PFC setting.
+ */
+ if (g_rx_pause || g_tx_pause)
+ *pfc_en = 0xff;
+ else
+ err = mlx5_query_port_pfc(mdev, pfc_en, NULL);
+
+ return err;
+}
+
+#define MINIMUM_MAX_MTU 9216
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+ u32 change, unsigned int mtu,
+ struct ieee_pfc *pfc,
+ u32 *buffer_size,
+ u8 *prio2buffer)
+{
+ u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
+ struct mlx5e_port_buffer port_buffer;
+ u32 xoff = calculate_xoff(priv, mtu);
+ bool update_prio2buffer = false;
+ u8 buffer[MLX5E_MAX_PRIORITY];
+ bool update_buffer = false;
+ unsigned int max_mtu;
+ u32 total_used = 0;
+ u8 curr_pfc_en;
+ int err;
+ int i;
+
+ mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+ max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU);
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_PFC) {
+ err = mlx5e_port_query_priority2buffer(priv->mdev, buffer);
+ if (err)
+ return err;
+
+ err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, port_buff_cell_sz,
+ &port_buffer, &update_buffer);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) {
+ update_prio2buffer = true;
+ err = fill_pfc_en(priv->mdev, &curr_pfc_en);
+ if (err)
+ return err;
+
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff,
+ port_buff_cell_sz, &port_buffer, &update_buffer);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_SIZE) {
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]);
+ if (!port_buffer.buffer[i].lossy && !buffer_size[i]) {
+ mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n",
+ __func__, i);
+ return -EINVAL;
+ }
+
+ port_buffer.buffer[i].size = buffer_size[i];
+ total_used += buffer_size[i];
+ }
+
+ mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used);
+
+ if (total_used > port_buffer.port_buffer_size)
+ return -EINVAL;
+
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz);
+ if (err)
+ return err;
+ }
+
+ /* Need to update buffer configuration if xoff value is changed */
+ if (!update_buffer && xoff != priv->dcbx.xoff) {
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz);
+ if (err)
+ return err;
+ }
+ priv->dcbx.xoff = xoff;
+
+ /* Apply the settings */
+ if (update_buffer) {
+ err = port_set_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+ }
+
+ if (update_prio2buffer)
+ err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
new file mode 100644
index 000000000..80af7a5ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_PORT_BUFFER_H__
+#define __MLX5_EN_PORT_BUFFER_H__
+
+#include "en.h"
+#include "port.h"
+
+#define MLX5E_MAX_BUFFER 8
+#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */
+
+#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \
+ MLX5_CAP_PCAM_REG(mdev, pbmc) && \
+ MLX5_CAP_PCAM_REG(mdev, pptb))
+
+enum {
+ MLX5E_PORT_BUFFER_CABLE_LEN = BIT(0),
+ MLX5E_PORT_BUFFER_PFC = BIT(1),
+ MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2),
+ MLX5E_PORT_BUFFER_SIZE = BIT(3),
+};
+
+struct mlx5e_bufferx_reg {
+ u8 lossy;
+ u8 epsb;
+ u32 size;
+ u32 xoff;
+ u32 xon;
+};
+
+struct mlx5e_port_buffer {
+ u32 port_buffer_size;
+ u32 spare_buffer_size;
+ struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER];
+};
+
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+ u32 change, unsigned int mtu,
+ struct ieee_pfc *pfc,
+ u32 *buffer_size,
+ u8 *prio2buffer);
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
new file mode 100644
index 000000000..72b4781f0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -0,0 +1,877 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies
+
+#include "en/ptp.h"
+#include "en/txrx.h"
+#include "en/params.h"
+#include "en/fs_tt_redirect.h"
+
+struct mlx5e_ptp_fs {
+ struct mlx5_flow_handle *l2_rule;
+ struct mlx5_flow_handle *udp_v4_rule;
+ struct mlx5_flow_handle *udp_v6_rule;
+ bool valid;
+};
+
+struct mlx5e_ptp_params {
+ struct mlx5e_params params;
+ struct mlx5e_sq_param txq_sq_param;
+ struct mlx5e_rq_param rq_param;
+};
+
+struct mlx5e_skb_cb_hwtstamp {
+ ktime_t cqe_hwtstamp;
+ ktime_t port_hwtstamp;
+};
+
+void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb)
+{
+ memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
+}
+
+static struct mlx5e_skb_cb_hwtstamp *mlx5e_skb_cb_get_hwts(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct mlx5e_skb_cb_hwtstamp) > sizeof(skb->cb));
+ return (struct mlx5e_skb_cb_hwtstamp *)skb->cb;
+}
+
+static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
+ struct mlx5e_ptp_cq_stats *cq_stats)
+{
+ struct skb_shared_hwtstamps hwts = {};
+ ktime_t diff;
+
+ diff = abs(mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp -
+ mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp);
+
+ /* Maximal allowed diff is 1 / 128 second */
+ if (diff > (NSEC_PER_SEC >> 7)) {
+ cq_stats->abort++;
+ cq_stats->abort_abs_diff_ns += diff;
+ return;
+ }
+
+ hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp;
+ skb_tstamp_tx(skb, &hwts);
+}
+
+void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
+ ktime_t hwtstamp,
+ struct mlx5e_ptp_cq_stats *cq_stats)
+{
+ switch (hwtstamp_type) {
+ case (MLX5E_SKB_CB_CQE_HWTSTAMP):
+ mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp = hwtstamp;
+ break;
+ case (MLX5E_SKB_CB_PORT_HWTSTAMP):
+ mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp = hwtstamp;
+ break;
+ }
+
+ /* If both CQEs arrive, check and report the port tstamp, and clear skb cb as
+ * skb soon to be released.
+ */
+ if (!mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp ||
+ !mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp)
+ return;
+
+ mlx5e_skb_cb_hwtstamp_tx(skb, cq_stats);
+ memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
+}
+
+#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask)
+
+static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+{
+ return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
+}
+
+static bool mlx5e_ptp_ts_cqe_ooo(struct mlx5e_ptpsq *ptpsq, u16 skb_id)
+{
+ u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ u16 skb_pc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc);
+
+ if (PTP_WQE_CTR2IDX(skb_id - skb_cc) >= PTP_WQE_CTR2IDX(skb_pc - skb_cc))
+ return true;
+
+ return false;
+}
+
+static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
+ u16 skb_id, int budget)
+{
+ struct skb_shared_hwtstamps hwts = {};
+ struct sk_buff *skb;
+
+ ptpsq->cq_stats->resync_event++;
+
+ while (skb_cc != skb_id) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
+ skb_tstamp_tx(skb, &hwts);
+ ptpsq->cq_stats->resync_cqe++;
+ napi_consume_skb(skb, budget);
+ skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ }
+}
+
+static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
+ struct mlx5_cqe64 *cqe,
+ int budget)
+{
+ u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
+ u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+ struct sk_buff *skb;
+ ktime_t hwtstamp;
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ ptpsq->cq_stats->err_cqe++;
+ goto out;
+ }
+
+ if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id)) {
+ if (mlx5e_ptp_ts_cqe_ooo(ptpsq, skb_id)) {
+ /* already handled by a previous resync */
+ ptpsq->cq_stats->ooo_cqe_drop++;
+ return;
+ }
+ mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget);
+ }
+
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
+ mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
+ hwtstamp, ptpsq->cq_stats);
+ ptpsq->cq_stats->cqe++;
+
+out:
+ napi_consume_skb(skb, budget);
+}
+
+static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
+{
+ struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq);
+ struct mlx5_cqwq *cqwq = &cq->wq;
+ struct mlx5_cqe64 *cqe;
+ int work_done = 0;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(cqwq);
+ if (!cqe)
+ return false;
+
+ do {
+ mlx5_cqwq_pop(cqwq);
+
+ mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget);
+ } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
+
+ mlx5_cqwq_update_db_record(cqwq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ mlx5e_txqsq_wake(&ptpsq->txqsq);
+
+ return work_done == budget;
+}
+
+static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_ptp *c = container_of(napi, struct mlx5e_ptp, napi);
+ struct mlx5e_ch_stats *ch_stats = c->stats;
+ struct mlx5e_rq *rq = &c->rq;
+ bool busy = false;
+ int work_done = 0;
+ int i;
+
+ rcu_read_lock();
+
+ ch_stats->poll++;
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (i = 0; i < c->num_tc; i++) {
+ busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget);
+ busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget);
+ }
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state) && likely(budget)) {
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+ busy |= work_done == budget;
+ busy |= INDIRECT_CALL_2(rq->post_wqes,
+ mlx5e_post_rx_mpwqes,
+ mlx5e_post_rx_wqes,
+ rq);
+ }
+
+ if (busy) {
+ work_done = budget;
+ goto out;
+ }
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ goto out;
+
+ ch_stats->arm++;
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (i = 0; i < c->num_tc; i++) {
+ mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq);
+ mlx5e_cq_arm(&c->ptpsq[i].ts_cq);
+ }
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ mlx5e_cq_arm(&rq->cq);
+
+out:
+ rcu_read_unlock();
+
+ return work_done;
+}
+
+static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq, int tc,
+ struct mlx5e_ptpsq *ptpsq)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+ int node;
+
+ sq->pdev = c->pdev;
+ sq->clock = &mdev->clock;
+ sq->mkey_be = c->mkey_be;
+ sq->netdev = c->netdev;
+ sq->priv = c->priv;
+ sq->mdev = mdev;
+ sq->ch_ix = MLX5E_PTP_CHANNEL_IX;
+ sq->txq_ix = txq_ix;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ sq->stats = &c->priv->ptp_stats.sq[tc];
+ sq->ptpsq = ptpsq;
+ INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
+ if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+ set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
+ sq->stop_room = param->stop_room;
+ sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
+
+ node = dev_to_node(mlx5_core_dma_dev(mdev));
+
+ param->wq.db_numa_node = node;
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_txqsq_db(sq, node);
+ if (err)
+ goto err_sq_wq_destroy;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_ptp_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
+{
+ mlx5_core_destroy_sq(mdev, sqn);
+}
+
+static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq);
+ struct mlx5_core_dev *mdev = ptpsq->txqsq.mdev;
+
+ ptpsq->skb_fifo.fifo = kvzalloc_node(array_size(wq_sz, sizeof(*ptpsq->skb_fifo.fifo)),
+ GFP_KERNEL, numa);
+ if (!ptpsq->skb_fifo.fifo)
+ return -ENOMEM;
+
+ ptpsq->skb_fifo.pc = &ptpsq->skb_fifo_pc;
+ ptpsq->skb_fifo.cc = &ptpsq->skb_fifo_cc;
+ ptpsq->skb_fifo.mask = wq_sz - 1;
+ if (MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter))
+ ptpsq->ts_cqe_ctr_mask =
+ (1 << MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) - 1;
+ return 0;
+}
+
+static void mlx5e_ptp_drain_skb_fifo(struct mlx5e_skb_fifo *skb_fifo)
+{
+ while (*skb_fifo->pc != *skb_fifo->cc) {
+ struct sk_buff *skb = mlx5e_skb_fifo_pop(skb_fifo);
+
+ dev_kfree_skb_any(skb);
+ }
+}
+
+static void mlx5e_ptp_free_traffic_db(struct mlx5e_skb_fifo *skb_fifo)
+{
+ mlx5e_ptp_drain_skb_fifo(skb_fifo);
+ kvfree(skb_fifo->fifo);
+}
+
+static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn,
+ int txq_ix, struct mlx5e_ptp_params *cparams,
+ int tc, struct mlx5e_ptpsq *ptpsq)
+{
+ struct mlx5e_sq_param *sqp = &cparams->txq_sq_param;
+ struct mlx5e_txqsq *txqsq = &ptpsq->txqsq;
+ struct mlx5e_create_sq_param csp = {};
+ int err;
+
+ err = mlx5e_ptp_alloc_txqsq(c, txq_ix, &cparams->params, sqp,
+ txqsq, tc, ptpsq);
+ if (err)
+ return err;
+
+ csp.tisn = tisn;
+ csp.tis_lst_sz = 1;
+ csp.cqn = txqsq->cq.mcq.cqn;
+ csp.wq_ctrl = &txqsq->wq_ctrl;
+ csp.min_inline_mode = txqsq->min_inline_mode;
+ csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;
+
+ err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
+ if (err)
+ goto err_free_txqsq;
+
+ err = mlx5e_ptp_alloc_traffic_db(ptpsq,
+ dev_to_node(mlx5_core_dma_dev(c->mdev)));
+ if (err)
+ goto err_free_txqsq;
+
+ return 0;
+
+err_free_txqsq:
+ mlx5e_free_txqsq(txqsq);
+
+ return err;
+}
+
+static void mlx5e_ptp_close_txqsq(struct mlx5e_ptpsq *ptpsq)
+{
+ struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+ struct mlx5_core_dev *mdev = sq->mdev;
+
+ mlx5e_ptp_free_traffic_db(&ptpsq->skb_fifo);
+ cancel_work_sync(&sq->recover_work);
+ mlx5e_ptp_destroy_sq(mdev, sq->sqn);
+ mlx5e_free_txqsq_descs(sq);
+ mlx5e_free_txqsq(sq);
+}
+
+static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ struct mlx5e_params *params = &cparams->params;
+ u8 num_tc = mlx5e_get_dcb_num_tc(params);
+ int ix_base;
+ int err;
+ int tc;
+
+ ix_base = num_tc * params->num_channels;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ int txq_ix = ix_base + tc;
+
+ err = mlx5e_ptp_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
+ cparams, tc, &c->ptpsq[tc]);
+ if (err)
+ goto close_txqsq;
+ }
+
+ return 0;
+
+close_txqsq:
+ for (--tc; tc >= 0; tc--)
+ mlx5e_ptp_close_txqsq(&c->ptpsq[tc]);
+
+ return err;
+}
+
+static void mlx5e_ptp_close_txqsqs(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_ptp_close_txqsq(&c->ptpsq[tc]);
+}
+
+static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ struct mlx5e_params *params = &cparams->params;
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder ptp_moder = {};
+ struct mlx5e_cq_param *cq_param;
+ u8 num_tc;
+ int err;
+ int tc;
+
+ num_tc = mlx5e_get_dcb_num_tc(params);
+
+ ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev));
+ ccp.ch_stats = c->stats;
+ ccp.napi = &c->napi;
+ ccp.ix = MLX5E_PTP_CHANNEL_IX;
+
+ cq_param = &cparams->txq_sq_param.cqp;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ struct mlx5e_cq *cq = &c->ptpsq[tc].txqsq.cq;
+
+ err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+ if (err)
+ goto out_err_txqsq_cq;
+ }
+
+ for (tc = 0; tc < num_tc; tc++) {
+ struct mlx5e_cq *cq = &c->ptpsq[tc].ts_cq;
+ struct mlx5e_ptpsq *ptpsq = &c->ptpsq[tc];
+
+ err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+ if (err)
+ goto out_err_ts_cq;
+
+ ptpsq->cq_stats = &c->priv->ptp_stats.cq[tc];
+ }
+
+ return 0;
+
+out_err_ts_cq:
+ for (--tc; tc >= 0; tc--)
+ mlx5e_close_cq(&c->ptpsq[tc].ts_cq);
+ tc = num_tc;
+out_err_txqsq_cq:
+ for (--tc; tc >= 0; tc--)
+ mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq);
+
+ return err;
+}
+
+static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder ptp_moder = {};
+ struct mlx5e_cq_param *cq_param;
+ struct mlx5e_cq *cq = &c->rq.cq;
+
+ ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev));
+ ccp.ch_stats = c->stats;
+ ccp.napi = &c->napi;
+ ccp.ix = MLX5E_PTP_CHANNEL_IX;
+
+ cq_param = &cparams->rq_param.cqp;
+
+ return mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+}
+
+static void mlx5e_ptp_close_tx_cqs(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->ptpsq[tc].ts_cq);
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq);
+}
+
+static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq;
+
+ mlx5e_build_sq_param_common(mdev, param);
+
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ param->stop_room = mlx5e_stop_room_for_max_wqe(mdev);
+ mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
+}
+
+static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ u16 q_counter,
+ struct mlx5e_ptp_params *ptp_params)
+{
+ struct mlx5e_rq_param *rq_params = &ptp_params->rq_param;
+ struct mlx5e_params *params = &ptp_params->params;
+
+ params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+ mlx5e_init_rq_type_params(mdev, params);
+ params->sw_mtu = netdev->max_mtu;
+ mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params);
+}
+
+static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams,
+ struct mlx5e_params *orig)
+{
+ struct mlx5e_params *params = &cparams->params;
+
+ params->tx_min_inline_mode = orig->tx_min_inline_mode;
+ params->num_channels = orig->num_channels;
+ params->hard_mtu = orig->hard_mtu;
+ params->sw_mtu = orig->sw_mtu;
+ params->mqprio = orig->mqprio;
+
+ /* SQ */
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ params->log_sq_size = orig->log_sq_size;
+ mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
+ }
+ /* RQ */
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ params->vlan_strip_disable = orig->vlan_strip_disable;
+ mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams);
+ }
+}
+
+static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5e_priv *priv = c->priv;
+ int err;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = priv->netdev;
+ rq->priv = priv;
+ rq->clock = &mdev->clock;
+ rq->tstamp = &priv->tstamp;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->stats = &c->priv->ptp_stats.rq;
+ rq->ix = MLX5E_PTP_CHANNEL_IX;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ err = mlx5e_rq_set_handlers(rq, params, false);
+ if (err)
+ return err;
+
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
+}
+
+static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int node = dev_to_node(c->mdev->device);
+ int err;
+
+ err = mlx5e_init_ptp_rq(c, params, &c->rq);
+ if (err)
+ return err;
+
+ return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq);
+}
+
+static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c,
+ struct mlx5e_ptp_params *cparams)
+{
+ int err;
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ err = mlx5e_ptp_open_tx_cqs(c, cparams);
+ if (err)
+ return err;
+
+ err = mlx5e_ptp_open_txqsqs(c, cparams);
+ if (err)
+ goto close_tx_cqs;
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ err = mlx5e_ptp_open_rx_cq(c, cparams);
+ if (err)
+ goto close_txqsq;
+
+ err = mlx5e_ptp_open_rq(c, &cparams->params, &cparams->rq_param);
+ if (err)
+ goto close_rx_cq;
+ }
+ return 0;
+
+close_rx_cq:
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ mlx5e_close_cq(&c->rq.cq);
+close_txqsq:
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state))
+ mlx5e_ptp_close_txqsqs(c);
+close_tx_cqs:
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state))
+ mlx5e_ptp_close_tx_cqs(c);
+
+ return err;
+}
+
+static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c)
+{
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ mlx5e_close_rq(&c->rq);
+ mlx5e_close_cq(&c->rq.cq);
+ }
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ mlx5e_ptp_close_txqsqs(c);
+ mlx5e_ptp_close_tx_cqs(c);
+ }
+}
+
+static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params)
+{
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS))
+ __set_bit(MLX5E_PTP_STATE_TX, c->state);
+
+ if (params->ptp_rx)
+ __set_bit(MLX5E_PTP_STATE_RX, c->state);
+
+ return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0;
+}
+
+static void mlx5e_ptp_rx_unset_fs(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
+
+ if (!ptp_fs->valid)
+ return;
+
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule);
+ mlx5e_fs_tt_redirect_any_destroy(fs);
+
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
+ ptp_fs->valid = false;
+}
+
+static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
+{
+ u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
+ struct mlx5e_flow_steering *fs = priv->fs;
+ struct mlx5_flow_handle *rule;
+ struct mlx5e_ptp_fs *ptp_fs;
+ int err;
+
+ ptp_fs = mlx5e_fs_get_ptp(fs);
+ if (ptp_fs->valid)
+ return 0;
+
+ err = mlx5e_fs_tt_redirect_udp_create(fs);
+ if (err)
+ goto out_free;
+
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV4_UDP,
+ tirn, PTP_EV_PORT);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto out_destroy_fs_udp;
+ }
+ ptp_fs->udp_v4_rule = rule;
+
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV6_UDP,
+ tirn, PTP_EV_PORT);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto out_destroy_udp_v4_rule;
+ }
+ ptp_fs->udp_v6_rule = rule;
+
+ err = mlx5e_fs_tt_redirect_any_create(fs);
+ if (err)
+ goto out_destroy_udp_v6_rule;
+
+ rule = mlx5e_fs_tt_redirect_any_add_rule(fs, tirn, ETH_P_1588);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto out_destroy_fs_any;
+ }
+ ptp_fs->l2_rule = rule;
+ ptp_fs->valid = true;
+
+ return 0;
+
+out_destroy_fs_any:
+ mlx5e_fs_tt_redirect_any_destroy(fs);
+out_destroy_udp_v6_rule:
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
+out_destroy_udp_v4_rule:
+ mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
+out_destroy_fs_udp:
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
+out_free:
+ return err;
+}
+
+int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ u8 lag_port, struct mlx5e_ptp **cp)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_ptp_params *cparams;
+ struct mlx5e_ptp *c;
+ int err;
+
+
+ c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev)));
+ cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL);
+ if (!c || !cparams) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ c->priv = priv;
+ c->mdev = priv->mdev;
+ c->tstamp = &priv->tstamp;
+ c->pdev = mlx5_core_dma_dev(priv->mdev);
+ c->netdev = priv->netdev;
+ c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
+ c->num_tc = mlx5e_get_dcb_num_tc(params);
+ c->stats = &priv->ptp_stats.ch;
+ c->lag_port = lag_port;
+
+ err = mlx5e_ptp_set_state(c, params);
+ if (err)
+ goto err_free;
+
+ netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll);
+
+ mlx5e_ptp_build_params(c, cparams, params);
+
+ err = mlx5e_ptp_open_queues(c, cparams);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ priv->rx_ptp_opened = true;
+
+ *cp = c;
+
+ kvfree(cparams);
+
+ return 0;
+
+err_napi_del:
+ netif_napi_del(&c->napi);
+err_free:
+ kvfree(cparams);
+ kvfree(c);
+ return err;
+}
+
+void mlx5e_ptp_close(struct mlx5e_ptp *c)
+{
+ mlx5e_ptp_close_queues(c);
+ netif_napi_del(&c->napi);
+
+ kvfree(c);
+}
+
+void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ napi_enable(&c->napi);
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq);
+ }
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ mlx5e_ptp_rx_set_fs(c->priv);
+ mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_sched(&c->napi);
+ }
+}
+
+void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
+{
+ int tc;
+
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ mlx5e_deactivate_rq(&c->rq);
+
+ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq);
+ }
+
+ napi_disable(&c->napi);
+}
+
+int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn)
+{
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+ return -EINVAL;
+
+ *rqn = c->rq.rqn;
+ return 0;
+}
+
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
+{
+ struct mlx5e_ptp_fs *ptp_fs;
+
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
+ return 0;
+
+ ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL);
+ if (!ptp_fs)
+ return -ENOMEM;
+ mlx5e_fs_set_ptp(fs, ptp_fs);
+
+ return 0;
+}
+
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
+{
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
+
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
+ return;
+
+ mlx5e_ptp_rx_unset_fs(fs);
+ kfree(ptp_fs);
+}
+
+int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
+{
+ struct mlx5e_ptp *c = priv->channels.ptp;
+
+ if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
+ return 0;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ if (set) {
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules");
+ return -EINVAL;
+ }
+ return mlx5e_ptp_rx_set_fs(priv);
+ }
+ /* set == false */
+ if (c && test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules");
+ return -EINVAL;
+ }
+ mlx5e_ptp_rx_unset_fs(priv->fs);
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
new file mode 100644
index 000000000..cc7efde88
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_PTP_H__
+#define __MLX5_EN_PTP_H__
+
+#include "en.h"
+#include "en_stats.h"
+#include "en/txrx.h"
+#include <linux/ptp_classify.h>
+
+#define MLX5E_PTP_CHANNEL_IX 0
+
+struct mlx5e_ptpsq {
+ struct mlx5e_txqsq txqsq;
+ struct mlx5e_cq ts_cq;
+ u16 skb_fifo_cc;
+ u16 skb_fifo_pc;
+ struct mlx5e_skb_fifo skb_fifo;
+ struct mlx5e_ptp_cq_stats *cq_stats;
+ u16 ts_cqe_ctr_mask;
+};
+
+enum {
+ MLX5E_PTP_STATE_TX,
+ MLX5E_PTP_STATE_RX,
+ MLX5E_PTP_STATE_NUM_STATES,
+};
+
+struct mlx5e_ptp {
+ /* data path */
+ struct mlx5e_ptpsq ptpsq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rq rq;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+ u8 num_tc;
+ u8 lag_port;
+
+ /* data path - accessed per napi poll */
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES);
+};
+
+static inline bool mlx5e_use_ptpsq(struct sk_buff *skb)
+{
+ struct flow_keys fk;
+
+ if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ return false;
+
+ if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+ return false;
+
+ if (fk.basic.n_proto == htons(ETH_P_1588))
+ return true;
+
+ if (fk.basic.n_proto != htons(ETH_P_IP) &&
+ fk.basic.n_proto != htons(ETH_P_IPV6))
+ return false;
+
+ return (fk.basic.ip_proto == IPPROTO_UDP &&
+ fk.ports.dst == htons(PTP_EV_PORT));
+}
+
+static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq)
+{
+ if (!sq->ptpsq)
+ return true;
+
+ return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo);
+}
+
+int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ u8 lag_port, struct mlx5e_ptp **cp);
+void mlx5e_ptp_close(struct mlx5e_ptp *c);
+void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c);
+void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c);
+int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn);
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
+int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set);
+
+enum {
+ MLX5E_SKB_CB_CQE_HWTSTAMP = BIT(0),
+ MLX5E_SKB_CB_PORT_HWTSTAMP = BIT(1),
+};
+
+void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
+ ktime_t hwtstamp,
+ struct mlx5e_ptp_cq_stats *cq_stats);
+
+void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb);
+#endif /* __MLX5_EN_PTP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
new file mode 100644
index 000000000..2842195ee
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+#include <net/sch_generic.h>
+
+#include <net/pkt_cls.h>
+#include "en.h"
+#include "params.h"
+#include "../qos.h"
+#include "en/htb.h"
+
+struct qos_sq_callback_params {
+ struct mlx5e_priv *priv;
+ struct mlx5e_channels *chs;
+};
+
+int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes)
+{
+ if (nbytes < BYTES_IN_MBIT) {
+ qos_warn(mdev, "Input rate (%llu Bytes/sec) below minimum supported (%u Bytes/sec)\n",
+ nbytes, BYTES_IN_MBIT);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static u32 mlx5e_qos_bytes2mbits(struct mlx5_core_dev *mdev, u64 nbytes)
+{
+ return div_u64(nbytes, BYTES_IN_MBIT);
+}
+
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+ return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev));
+}
+
+/* TX datapath API */
+
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
+{
+ /* These channel params are safe to access from the datapath, because:
+ * 1. This function is called only after checking selq->htb_maj_id != 0,
+ * and the number of queues can't change while HTB offload is active.
+ * 2. When selq->htb_maj_id becomes 0, synchronize_rcu waits for
+ * mlx5e_select_queue to finish while holding priv->state_lock,
+ * preventing other code from changing the number of queues.
+ */
+ bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS);
+
+ return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid;
+}
+
+/* SQ lifecycle */
+
+static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_channel *c;
+ int ix;
+
+ ix = qid % params->num_channels;
+ qid /= params->num_channels;
+ c = priv->channels.c[ix];
+
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ return mlx5e_state_dereference(priv, qos_sqs[qid]);
+}
+
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id)
+{
+ struct mlx5e_create_cq_param ccp = {};
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_sq_param param_sq;
+ struct mlx5e_cq_param param_cq;
+ int txq_ix, ix, qid, err = 0;
+ struct mlx5e_params *params;
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+
+ params = &chs->params;
+
+ txq_ix = mlx5e_qid_from_qos(chs, node_qid);
+
+ WARN_ON(node_qid > priv->htb_max_qos_sqs);
+ if (node_qid == priv->htb_max_qos_sqs) {
+ struct mlx5e_sq_stats *stats, **stats_list = NULL;
+
+ if (priv->htb_max_qos_sqs == 0) {
+ stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+ sizeof(*stats_list),
+ GFP_KERNEL);
+ if (!stats_list)
+ return -ENOMEM;
+ }
+ stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+ if (!stats) {
+ kvfree(stats_list);
+ return -ENOMEM;
+ }
+ if (stats_list)
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
+ /* Order htb_max_qos_sqs increment after writing the array pointer.
+ * Pairs with smp_load_acquire in en_stats.c.
+ */
+ smp_store_release(&priv->htb_max_qos_sqs, priv->htb_max_qos_sqs + 1);
+ }
+
+ ix = node_qid % params->num_channels;
+ qid = node_qid / params->num_channels;
+ c = chs->c[ix];
+
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ sq = kzalloc(sizeof(*sq), GFP_KERNEL);
+
+ if (!sq)
+ return -ENOMEM;
+
+ mlx5e_build_create_cq_param(&ccp, c);
+
+ memset(&param_sq, 0, sizeof(param_sq));
+ memset(&param_cq, 0, sizeof(param_cq));
+ mlx5e_build_sq_param(priv->mdev, params, &param_sq);
+ mlx5e_build_tx_cq_param(priv->mdev, params, &param_cq);
+ err = mlx5e_open_cq(priv, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq);
+ if (err)
+ goto err_free_sq;
+ err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params,
+ &param_sq, sq, 0, hw_id,
+ priv->htb_qos_sq_stats[node_qid]);
+ if (err)
+ goto err_close_cq;
+
+ rcu_assign_pointer(qos_sqs[qid], sq);
+
+ return 0;
+
+err_close_cq:
+ mlx5e_close_cq(&sq->cq);
+err_free_sq:
+ kfree(sq);
+ return err;
+}
+
+static int mlx5e_open_qos_sq_cb_wrapper(void *data, u16 node_qid, u32 hw_id)
+{
+ struct qos_sq_callback_params *cb_params = data;
+
+ return mlx5e_open_qos_sq(cb_params->priv, cb_params->chs, node_qid, hw_id);
+}
+
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
+{
+ struct mlx5e_priv *priv = data;
+ struct mlx5e_txqsq *sq;
+ u16 qid;
+
+ sq = mlx5e_get_qos_sq(priv, node_qid);
+
+ qid = mlx5e_qid_from_qos(&priv->channels, node_qid);
+
+ /* If it's a new queue, it will be marked as started at this point.
+ * Stop it before updating txq2sq.
+ */
+ mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid));
+
+ priv->txq2sq[qid] = sq;
+
+ /* Make the change to txq2sq visible before the queue is started.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
+
+ qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid);
+ mlx5e_activate_txqsq(sq);
+
+ return 0;
+}
+
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_get_qos_sq(priv, qid);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ return;
+
+ qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
+ mlx5e_deactivate_txqsq(sq);
+
+ priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
+
+ /* Make the change to txq2sq visible before the queue is started again.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
+}
+
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_params *params;
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+ int ix;
+
+ params = &priv->channels.params;
+
+ ix = qid % params->num_channels;
+ qid /= params->num_channels;
+ c = priv->channels.c[ix];
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock));
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ return;
+
+ synchronize_rcu(); /* Sync with NAPI. */
+
+ mlx5e_close_txqsq(sq);
+ mlx5e_close_cq(&sq->cq);
+ kfree(sq);
+}
+
+void mlx5e_qos_close_queues(struct mlx5e_channel *c)
+{
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ int i;
+
+ qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock));
+ if (!qos_sqs)
+ return;
+ synchronize_rcu(); /* Sync with NAPI. */
+
+ for (i = 0; i < c->qos_sqs_size; i++) {
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ continue;
+
+ mlx5e_close_txqsq(sq);
+ mlx5e_close_cq(&sq->cq);
+ kfree(sq);
+ }
+
+ kvfree(qos_sqs);
+}
+
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_qos_close_queues(chs->c[i]);
+}
+
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ u16 qos_sqs_size;
+ int i;
+
+ qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num);
+
+ for (i = 0; i < chs->num; i++) {
+ struct mlx5e_txqsq **sqs;
+
+ sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL);
+ if (!sqs)
+ goto err_free;
+
+ WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size);
+ smp_wmb(); /* Pairs with mlx5e_napi_poll. */
+ rcu_assign_pointer(chs->c[i]->qos_sqs, sqs);
+ }
+
+ return 0;
+
+err_free:
+ while (--i >= 0) {
+ struct mlx5e_txqsq **sqs;
+
+ sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL,
+ lockdep_is_held(&priv->state_lock));
+
+ synchronize_rcu(); /* Sync with NAPI. */
+ kvfree(sqs);
+ }
+ return -ENOMEM;
+}
+
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ struct qos_sq_callback_params callback_params;
+ int err;
+
+ err = mlx5e_qos_alloc_queues(priv, chs);
+ if (err)
+ return err;
+
+ callback_params.priv = priv;
+ callback_params.chs = chs;
+
+ err = mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_open_qos_sq_cb_wrapper, &callback_params);
+ if (err) {
+ mlx5e_qos_close_all_queues(chs);
+ return err;
+ }
+
+ return 0;
+}
+
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv)
+{
+ mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_activate_qos_sq, priv);
+}
+
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
+{
+ struct mlx5e_params *params = &c->priv->channels.params;
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ int i;
+
+ qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs);
+ if (!qos_sqs)
+ return;
+
+ for (i = 0; i < c->qos_sqs_size; i++) {
+ u16 qid = params->num_channels * i + c->ix;
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ continue;
+
+ qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid);
+ mlx5e_deactivate_txqsq(sq);
+
+ /* The queue is disabled, no synchronization with datapath is needed. */
+ c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL;
+ }
+}
+
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_qos_deactivate_queues(chs->c[i]);
+}
+
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
+{
+ qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid);
+ netdev_tx_reset_queue(txq);
+ netif_tx_start_queue(txq);
+}
+
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
+{
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid);
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+ if (!qdisc)
+ return;
+
+ spin_lock_bh(qdisc_lock(qdisc));
+ qdisc_reset(qdisc);
+ spin_unlock_bh(qdisc_lock(qdisc));
+}
+
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_qopt)
+{
+ struct mlx5e_htb *htb = priv->htb;
+ int res;
+
+ if (!htb && htb_qopt->command != TC_HTB_CREATE)
+ return -EINVAL;
+
+ switch (htb_qopt->command) {
+ case TC_HTB_CREATE:
+ if (!mlx5_qos_is_supported(priv->mdev)) {
+ NL_SET_ERR_MSG_MOD(htb_qopt->extack,
+ "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+ return -EOPNOTSUPP;
+ }
+ priv->htb = mlx5e_htb_alloc();
+ htb = priv->htb;
+ if (!htb)
+ return -ENOMEM;
+ res = mlx5e_htb_init(htb, htb_qopt, priv->netdev, priv->mdev, &priv->selq, priv);
+ if (res) {
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
+ }
+ return res;
+ case TC_HTB_DESTROY:
+ mlx5e_htb_cleanup(htb);
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
+ return 0;
+ case TC_HTB_LEAF_ALLOC_QUEUE:
+ res = mlx5e_htb_leaf_alloc_queue(htb, htb_qopt->classid, htb_qopt->parent_classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ case TC_HTB_LEAF_TO_INNER:
+ return mlx5e_htb_leaf_to_inner(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL:
+ return mlx5e_htb_leaf_del(htb, &htb_qopt->classid, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL_LAST:
+ case TC_HTB_LEAF_DEL_LAST_FORCE:
+ return mlx5e_htb_leaf_del_last(htb, htb_qopt->classid,
+ htb_qopt->command == TC_HTB_LEAF_DEL_LAST_FORCE,
+ htb_qopt->extack);
+ case TC_HTB_NODE_MODIFY:
+ return mlx5e_htb_node_modify(htb, htb_qopt->classid, htb_qopt->rate, htb_qopt->ceil,
+ htb_qopt->extack);
+ case TC_HTB_LEAF_QUERY_QUEUE:
+ res = mlx5e_htb_get_txq_by_classid(htb, htb_qopt->classid);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+struct mlx5e_mqprio_rl {
+ struct mlx5_core_dev *mdev;
+ u32 root_id;
+ u32 *leaves_id;
+ u8 num_tc;
+};
+
+struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_mqprio_rl), GFP_KERNEL);
+}
+
+void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl)
+{
+ kvfree(rl);
+}
+
+int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc,
+ u64 max_rate[])
+{
+ int err;
+ int tc;
+
+ if (!mlx5_qos_is_supported(mdev)) {
+ qos_warn(mdev, "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+ return -EOPNOTSUPP;
+ }
+ if (num_tc > mlx5e_qos_max_leaf_nodes(mdev))
+ return -EINVAL;
+
+ rl->mdev = mdev;
+ rl->num_tc = num_tc;
+ rl->leaves_id = kvcalloc(num_tc, sizeof(*rl->leaves_id), GFP_KERNEL);
+ if (!rl->leaves_id)
+ return -ENOMEM;
+
+ err = mlx5_qos_create_root_node(mdev, &rl->root_id);
+ if (err)
+ goto err_free_leaves;
+
+ qos_dbg(mdev, "Root created, id %#x\n", rl->root_id);
+
+ for (tc = 0; tc < num_tc; tc++) {
+ u32 max_average_bw;
+
+ max_average_bw = mlx5e_qos_bytes2mbits(mdev, max_rate[tc]);
+ err = mlx5_qos_create_leaf_node(mdev, rl->root_id, 0, max_average_bw,
+ &rl->leaves_id[tc]);
+ if (err)
+ goto err_destroy_leaves;
+
+ qos_dbg(mdev, "Leaf[%d] created, id %#x, max average bw %u Mbits/sec\n",
+ tc, rl->leaves_id[tc], max_average_bw);
+ }
+ return 0;
+
+err_destroy_leaves:
+ while (--tc >= 0)
+ mlx5_qos_destroy_node(mdev, rl->leaves_id[tc]);
+ mlx5_qos_destroy_node(mdev, rl->root_id);
+err_free_leaves:
+ kvfree(rl->leaves_id);
+ return err;
+}
+
+void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl)
+{
+ int tc;
+
+ for (tc = 0; tc < rl->num_tc; tc++)
+ mlx5_qos_destroy_node(rl->mdev, rl->leaves_id[tc]);
+ mlx5_qos_destroy_node(rl->mdev, rl->root_id);
+ kvfree(rl->leaves_id);
+}
+
+int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id)
+{
+ if (tc >= rl->num_tc)
+ return -EINVAL;
+
+ *hw_id = rl->leaves_id[tc];
+ return 0;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
new file mode 100644
index 000000000..4947afa23
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_EN_QOS_H
+#define __MLX5E_EN_QOS_H
+
+#include <linux/mlx5/driver.h>
+
+#define BYTES_IN_MBIT 125000
+
+struct mlx5e_priv;
+struct mlx5e_htb;
+struct mlx5e_channels;
+struct mlx5e_channel;
+struct tc_htb_qopt_offload;
+
+int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes);
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+
+/* SQ lifecycle */
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id);
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id);
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq);
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid);
+
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs);
+void mlx5e_qos_close_queues(struct mlx5e_channel *c);
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs);
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+/* TX datapath API */
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid);
+
+/* HTB API */
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb);
+
+/* MQPRIO TX rate limit */
+struct mlx5e_mqprio_rl;
+struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void);
+void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl);
+int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc,
+ u64 max_rate[]);
+void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl);
+int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
new file mode 100644
index 000000000..b6f5c1bcd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <net/lag.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "en_rep.h"
+
+struct mlx5e_rep_bond {
+ struct notifier_block nb;
+ struct netdev_net_notifier nn;
+ struct list_head metadata_list;
+};
+
+struct mlx5e_rep_bond_slave_entry {
+ struct list_head list;
+ struct net_device *netdev;
+};
+
+struct mlx5e_rep_bond_metadata {
+ struct list_head list; /* link to global list of rep_bond_metadata */
+ struct mlx5_eswitch *esw;
+ /* private of uplink holding rep bond metadata list */
+ struct net_device *lag_dev;
+ u32 metadata_reg_c_0;
+
+ struct list_head slaves_list; /* slaves list */
+ int slaves;
+};
+
+static struct mlx5e_rep_bond_metadata *
+mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_metadata *found = NULL;
+ struct mlx5e_rep_bond_metadata *cur;
+
+ list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
+ if (cur->lag_dev == lag_dev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static struct mlx5e_rep_bond_slave_entry *
+mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
+ const struct net_device *netdev)
+{
+ struct mlx5e_rep_bond_slave_entry *found = NULL;
+ struct mlx5e_rep_bond_slave_entry *cur;
+
+ list_for_each_entry(cur, &mdata->slaves_list, list) {
+ if (cur->netdev == netdev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
+{
+ netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ list_del(&mdata->list);
+ mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
+ WARN_ON(!list_empty(&mdata->slaves_list));
+ kfree(mdata);
+}
+
+/* This must be called under rtnl_lock */
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+ struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+ int err;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata) {
+ /* First netdev becomes slave, no metadata presents the lag_dev. Create one */
+ mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
+ if (!mdata)
+ return -ENOMEM;
+
+ mdata->lag_dev = lag_dev;
+ mdata->esw = esw;
+ INIT_LIST_HEAD(&mdata->slaves_list);
+ mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
+ if (!mdata->metadata_reg_c_0) {
+ kfree(mdata);
+ return -ENOSPC;
+ }
+ list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
+
+ netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ }
+
+ s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
+ if (!s_entry) {
+ err = -ENOMEM;
+ goto entry_alloc_err;
+ }
+
+ s_entry->netdev = netdev;
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
+ mdata->metadata_reg_c_0);
+ if (err)
+ goto ingress_err;
+
+ mdata->slaves++;
+ list_add_tail(&s_entry->list, &mdata->slaves_list);
+ netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ return 0;
+
+ingress_err:
+ kfree(s_entry);
+entry_alloc_err:
+ if (!mdata->slaves)
+ mlx5e_rep_bond_metadata_release(mdata);
+ return err;
+}
+
+/* This must be called under rtnl_lock */
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ const struct net_device *netdev,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata)
+ return;
+
+ s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
+ if (!s_entry)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ /* Reset bond_metadata to zero first then reset all ingress/egress
+ * acls and rx rules of unslave representor's vport
+ */
+ mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
+ mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
+ mlx5e_rep_bond_update(priv, false);
+
+ list_del(&s_entry->list);
+
+ netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ if (--mdata->slaves == 0)
+ mlx5e_rep_bond_metadata_release(mdata);
+ kfree(s_entry);
+}
+
+static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+{
+ return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
+}
+
+static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changelowerstate_info *info;
+ struct netdev_lag_lower_state_info *lag_info;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+ struct list_head *iter;
+ struct net_device *dev;
+ u16 acl_vport_num;
+ u16 fwd_vport_num;
+ int err;
+
+ info = ptr;
+ lag_info = info->lower_state_info;
+ /* This is not an event of a representor becoming active slave */
+ if (!lag_info->tx_enabled)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ fwd_vport_num = rpriv->rep->vport;
+ lag_dev = netdev_master_upper_dev_get(netdev);
+ if (!lag_dev)
+ return;
+
+ netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
+ lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
+
+ /* Point everyone's egress acl to the vport of the active representor */
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ acl_vport_num = rpriv->rep->vport;
+ if (acl_vport_num != fwd_vport_num) {
+ /* Only single rx_rule for unique bond_metadata should be
+ * present, delete it if it's saved as passive vport's
+ * rx_rule with destination as passive vport's root_ft
+ */
+ mlx5e_rep_bond_update(priv, true);
+ err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
+ fwd_vport_num,
+ acl_vport_num);
+ if (err)
+ netdev_warn(dev,
+ "configure slave vport(%d) egress fwd, err(%d)",
+ acl_vport_num, err);
+ }
+ }
+
+ /* Insert new rx_rule for unique bond_metadata, save it as active vport's
+ * rx_rule with new destination as active vport's root_ft
+ */
+ err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
+ if (err)
+ netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
+ fwd_vport_num, err);
+}
+
+static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ lag_dev = info->upper_dev;
+
+ netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
+ info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
+
+ if (info->linking)
+ mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+ else
+ mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+}
+
+/* Bond device of representors and netdev events are used here in specific way
+ * to support eswitch vports bonding and to perform failover of eswitch vport
+ * by modifying the vport's egress acl of lower dev representors. Thus this
+ * also change the traditional behavior of lower dev under bond device.
+ * All non-representor netdevs or representors of other vendors as lower dev
+ * of bond device are not supported.
+ */
+static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_bond *bond;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return NOTIFY_DONE;
+
+ bond = container_of(nb, struct mlx5e_rep_bond, nb);
+ priv = netdev_priv(netdev);
+ rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
+ /* Verify VF representor is on the same device of the bond handling the netevent. */
+ if (rpriv->uplink_priv.bond != bond)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
+ mlx5e_rep_changelowerstate_event(netdev, ptr);
+ break;
+ case NETDEV_CHANGEUPPER:
+ mlx5e_rep_changeupper_event(netdev, ptr);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/* If HW support eswitch vports bonding, register a specific notifier to
+ * handle it when two or more representors are bonded
+ */
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv;
+ int ret = 0;
+
+ priv = netdev_priv(netdev);
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
+ goto out;
+
+ uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
+ if (!uplink_priv->bond) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
+ uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
+ ret = register_netdevice_notifier_dev_net(netdev,
+ &uplink_priv->bond->nb,
+ &uplink_priv->bond->nn);
+ if (ret) {
+ netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
+ kvfree(uplink_priv->bond);
+ uplink_priv->bond = NULL;
+ }
+
+out:
+ return ret;
+}
+
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
+ !rpriv->uplink_priv.bond)
+ return;
+
+ unregister_netdevice_notifier_dev_net(rpriv->netdev,
+ &rpriv->uplink_priv.bond->nb,
+ &rpriv->uplink_priv.bond->nn);
+ kvfree(rpriv->uplink_priv.bond);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
new file mode 100644
index 000000000..ce85b48d3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <net/netevent.h>
+#include <net/switchdev.h>
+#include "bridge.h"
+#include "esw/bridge.h"
+#include "en_rep.h"
+
+#define MLX5_ESW_BRIDGE_UPDATE_INTERVAL 1000
+
+struct mlx5_bridge_switchdev_fdb_work {
+ struct work_struct work;
+ struct switchdev_notifier_fdb_info fdb_info;
+ struct net_device *dev;
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ bool add;
+};
+
+static bool mlx5_esw_bridge_dev_same_esw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return esw == priv->mdev->priv.eswitch;
+}
+
+static bool mlx5_esw_bridge_dev_same_hw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev, *esw_mdev;
+ u64 system_guid, esw_system_guid;
+
+ mdev = priv->mdev;
+ esw_mdev = esw->dev;
+
+ system_guid = mlx5_query_nic_system_image_guid(mdev);
+ esw_system_guid = mlx5_query_nic_system_image_guid(esw_mdev);
+
+ return system_guid == esw_system_guid;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+ priv = netdev_priv(lower);
+ mdev = priv->mdev;
+ if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw))
+ return lower;
+ }
+
+ return NULL;
+}
+
+static struct net_device *
+mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+ u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ if (netif_is_lag_master(dev))
+ dev = mlx5_esw_bridge_lag_rep_get(dev, esw);
+
+ if (!dev || !mlx5e_eswitch_rep(dev) || !mlx5_esw_bridge_dev_same_hw(dev, esw))
+ return NULL;
+
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ *vport_num = rpriv->rep->vport;
+ *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id);
+ return dev;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+ u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ if (netif_is_lag_master(dev) || mlx5e_eswitch_rep(dev))
+ return mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, vport_num,
+ esw_owner_vhca_id);
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ struct net_device *rep;
+
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ rep = mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(lower_dev, esw, vport_num,
+ esw_owner_vhca_id);
+ if (rep)
+ return rep;
+ }
+
+ return NULL;
+}
+
+static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *rep,
+ struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5_esw_bridge_dev_same_esw(rep, esw))
+ return false;
+
+ priv = netdev_priv(rep);
+ mdev = priv->mdev;
+ if (netif_is_lag_master(dev))
+ return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev);
+ return true;
+}
+
+static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ netdev_nb);
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct net_device *upper = info->upper_dev, *rep;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ u16 vport_num, esw_owner_vhca_id;
+ struct netlink_ext_ack *extack;
+ int ifindex = upper->ifindex;
+ int err = 0;
+
+ if (!netif_is_bridge_master(upper))
+ return 0;
+
+ rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+ if (!rep)
+ return 0;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ if (mlx5_esw_bridge_is_local(dev, rep, esw))
+ err = info->linking ?
+ mlx5_esw_bridge_vport_link(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack) :
+ mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack);
+ else if (mlx5_esw_bridge_dev_same_hw(rep, esw))
+ err = info->linking ?
+ mlx5_esw_bridge_vport_peer_link(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack) :
+ mlx5_esw_bridge_vport_peer_unlink(ifindex, vport_num, esw_owner_vhca_id,
+ br_offloads, extack);
+
+ return err;
+}
+
+static int
+mlx5_esw_bridge_changeupper_validate_netdev(void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct net_device *upper = info->upper_dev;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev))
+ return 0;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+ priv = netdev_priv(lower);
+ mdev = priv->mdev;
+ if (!mlx5_lag_is_active(mdev))
+ return -EAGAIN;
+ if (!mlx5_lag_is_shared_fdb(mdev))
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ int err = 0;
+
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
+ err = mlx5_esw_bridge_changeupper_validate_netdev(ptr);
+ break;
+
+ case NETDEV_CHANGEUPPER:
+ err = mlx5_esw_bridge_port_changeupper(nb, ptr);
+ break;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static int
+mlx5_esw_bridge_port_obj_add(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+ const struct switchdev_obj *obj = port_obj_info->obj;
+ const struct switchdev_obj_port_vlan *vlan;
+ u16 vport_num, esw_owner_vhca_id;
+ int err;
+
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_obj_info->handled = true;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid,
+ vlan->flags, br_offloads, extack);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static int
+mlx5_esw_bridge_port_obj_del(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ const struct switchdev_obj *obj = port_obj_info->obj;
+ const struct switchdev_obj_port_vlan *vlan;
+ u16 vport_num, esw_owner_vhca_id;
+
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_obj_info->handled = true;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int
+mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
+ struct switchdev_notifier_port_attr_info *port_attr_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info);
+ const struct switchdev_attr *attr = port_attr_info->attr;
+ u16 vport_num, esw_owner_vhca_id;
+ int err = 0;
+
+ if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ return 0;
+
+ port_attr_info->handled = true;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+ if (attr->u.brport_flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flag is not supported");
+ err = -EINVAL;
+ }
+ break;
+ case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+ err = mlx5_esw_bridge_ageing_time_set(vport_num, esw_owner_vhca_id,
+ attr->u.ageing_time, br_offloads);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+ err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id,
+ attr->u.vlan_filtering, br_offloads);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL:
+ err = mlx5_esw_bridge_vlan_proto_set(vport_num,
+ esw_owner_vhca_id,
+ attr->u.vlan_protocol,
+ br_offloads);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5_esw_bridge_event_blocking(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ nb_blk);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ int err;
+
+ switch (event) {
+ case SWITCHDEV_PORT_OBJ_ADD:
+ err = mlx5_esw_bridge_port_obj_add(dev, ptr, br_offloads);
+ break;
+ case SWITCHDEV_PORT_OBJ_DEL:
+ err = mlx5_esw_bridge_port_obj_del(dev, ptr, br_offloads);
+ break;
+ case SWITCHDEV_PORT_ATTR_SET:
+ err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
+ break;
+ default:
+ err = 0;
+ }
+
+ return notifier_from_errno(err);
+}
+
+static void
+mlx5_esw_bridge_cleanup_switchdev_fdb_work(struct mlx5_bridge_switchdev_fdb_work *fdb_work)
+{
+ dev_put(fdb_work->dev);
+ kfree(fdb_work->fdb_info.addr);
+ kfree(fdb_work);
+}
+
+static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work)
+{
+ struct mlx5_bridge_switchdev_fdb_work *fdb_work =
+ container_of(work, struct mlx5_bridge_switchdev_fdb_work, work);
+ struct switchdev_notifier_fdb_info *fdb_info =
+ &fdb_work->fdb_info;
+ struct mlx5_esw_bridge_offloads *br_offloads =
+ fdb_work->br_offloads;
+ struct net_device *dev = fdb_work->dev;
+ u16 vport_num, esw_owner_vhca_id;
+
+ rtnl_lock();
+
+ if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+ &esw_owner_vhca_id))
+ goto out;
+
+ if (fdb_work->add)
+ mlx5_esw_bridge_fdb_create(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+ else
+ mlx5_esw_bridge_fdb_remove(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+
+out:
+ rtnl_unlock();
+ mlx5_esw_bridge_cleanup_switchdev_fdb_work(fdb_work);
+}
+
+static struct mlx5_bridge_switchdev_fdb_work *
+mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add,
+ struct switchdev_notifier_fdb_info *fdb_info,
+ struct mlx5_esw_bridge_offloads *br_offloads)
+{
+ struct mlx5_bridge_switchdev_fdb_work *work;
+ u8 *addr;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_WORK(&work->work, mlx5_esw_bridge_switchdev_fdb_event_work);
+ memcpy(&work->fdb_info, fdb_info, sizeof(work->fdb_info));
+
+ addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+ if (!addr) {
+ kfree(work);
+ return ERR_PTR(-ENOMEM);
+ }
+ ether_addr_copy(addr, fdb_info->addr);
+ work->fdb_info.addr = addr;
+
+ dev_hold(dev);
+ work->dev = dev;
+ work->br_offloads = br_offloads;
+ work->add = add;
+ return work;
+}
+
+static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+ struct mlx5_esw_bridge_offloads,
+ nb);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct mlx5_bridge_switchdev_fdb_work *work;
+ struct mlx5_eswitch *esw = br_offloads->esw;
+ struct switchdev_notifier_info *info = ptr;
+ u16 vport_num, esw_owner_vhca_id;
+ struct net_device *upper, *rep;
+
+ if (event == SWITCHDEV_PORT_ATTR_SET) {
+ int err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
+
+ return notifier_from_errno(err);
+ }
+
+ upper = netdev_master_upper_dev_get_rcu(dev);
+ if (!upper)
+ return NOTIFY_DONE;
+ if (!netif_is_bridge_master(upper))
+ return NOTIFY_DONE;
+
+ rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+ if (!rep)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE:
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+ mlx5_esw_bridge_fdb_update_used(dev, vport_num, esw_owner_vhca_id, br_offloads,
+ fdb_info);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+ /* only handle the event on peers */
+ if (mlx5_esw_bridge_is_local(dev, rep, esw))
+ break;
+ fallthrough;
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+
+ work = mlx5_esw_bridge_init_switchdev_fdb_work(dev,
+ event == SWITCHDEV_FDB_ADD_TO_DEVICE,
+ fdb_info,
+ br_offloads);
+ if (IS_ERR(work)) {
+ WARN_ONCE(1, "Failed to init switchdev work, err=%ld",
+ PTR_ERR(work));
+ return notifier_from_errno(PTR_ERR(work));
+ }
+
+ queue_work(br_offloads->wq, &work->work);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static void mlx5_esw_bridge_update_work(struct work_struct *work)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads = container_of(work,
+ struct mlx5_esw_bridge_offloads,
+ update_work.work);
+
+ rtnl_lock();
+ mlx5_esw_bridge_update(br_offloads);
+ rtnl_unlock();
+
+ queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+ msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL));
+}
+
+void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw =
+ mdev->priv.eswitch;
+ int err;
+
+ rtnl_lock();
+ br_offloads = mlx5_esw_bridge_init(esw);
+ rtnl_unlock();
+ if (IS_ERR(br_offloads)) {
+ esw_warn(mdev, "Failed to init esw bridge (err=%ld)\n", PTR_ERR(br_offloads));
+ return;
+ }
+
+ br_offloads->wq = alloc_ordered_workqueue("mlx5_bridge_wq", 0);
+ if (!br_offloads->wq) {
+ esw_warn(mdev, "Failed to allocate bridge offloads workqueue\n");
+ goto err_alloc_wq;
+ }
+
+ br_offloads->nb.notifier_call = mlx5_esw_bridge_switchdev_event;
+ err = register_switchdev_notifier(&br_offloads->nb);
+ if (err) {
+ esw_warn(mdev, "Failed to register switchdev notifier (err=%d)\n", err);
+ goto err_register_swdev;
+ }
+
+ br_offloads->nb_blk.notifier_call = mlx5_esw_bridge_event_blocking;
+ err = register_switchdev_blocking_notifier(&br_offloads->nb_blk);
+ if (err) {
+ esw_warn(mdev, "Failed to register blocking switchdev notifier (err=%d)\n", err);
+ goto err_register_swdev_blk;
+ }
+
+ br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
+ err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ if (err) {
+ esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
+ err);
+ goto err_register_netdev;
+ }
+ INIT_DELAYED_WORK(&br_offloads->update_work, mlx5_esw_bridge_update_work);
+ queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+ msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL));
+ return;
+
+err_register_netdev:
+ unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+err_register_swdev_blk:
+ unregister_switchdev_notifier(&br_offloads->nb);
+err_register_swdev:
+ destroy_workqueue(br_offloads->wq);
+err_alloc_wq:
+ rtnl_lock();
+ mlx5_esw_bridge_cleanup(esw);
+ rtnl_unlock();
+}
+
+void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5_esw_bridge_offloads *br_offloads;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw =
+ mdev->priv.eswitch;
+
+ br_offloads = esw->br_offloads;
+ if (!br_offloads)
+ return;
+
+ cancel_delayed_work_sync(&br_offloads->update_work);
+ unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+ unregister_switchdev_notifier(&br_offloads->nb);
+ destroy_workqueue(br_offloads->wq);
+ rtnl_lock();
+ mlx5_esw_bridge_cleanup(esw);
+ rtnl_unlock();
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h
new file mode 100644
index 000000000..fbeb64242
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_BRIDGE__
+#define __MLX5_EN_REP_BRIDGE__
+
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_MLX5_BRIDGE)
+
+void mlx5e_rep_bridge_init(struct mlx5e_priv *priv);
+void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_BRIDGE */
+
+static inline void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) {}
+
+#endif /* CONFIG_MLX5_BRIDGE */
+
+#endif /* __MLX5_EN_REP_BRIDGE__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
new file mode 100644
index 000000000..2e9bee4e5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include <net/arp.h>
+#include "neigh.h"
+#include "tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+#include "diag/en_rep_tracepoint.h"
+
+static unsigned long mlx5e_rep_ipv6_interval(void)
+{
+ if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
+ return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
+
+ return ~0UL;
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+ unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
+}
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ mlx5_fc_queue_stats_work(priv->mdev,
+ &neigh_update->neigh_stats_work,
+ neigh_update->min_interval);
+}
+
+static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+ return refcount_inc_not_zero(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+ if (refcount_dec_and_test(&nhe->refcnt)) {
+ mlx5e_rep_neigh_entry_remove(nhe);
+ kfree_rcu(nhe, rcu);
+ }
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh_hash_entry *next = NULL;
+
+ rcu_read_lock();
+
+ for (next = nhe ?
+ list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &nhe->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list) :
+ list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list);
+ next;
+ next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &next->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list))
+ if (mlx5e_rep_neigh_entry_hold(next))
+ break;
+
+ rcu_read_unlock();
+
+ if (nhe)
+ mlx5e_rep_neigh_entry_release(nhe);
+
+ return next;
+}
+
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+ neigh_update.neigh_stats_work.work);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+
+ rtnl_lock();
+ if (!list_empty(&rpriv->neigh_update.neigh_list))
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
+ mlx5e_tc_update_neigh_used_value(nhe);
+
+ rtnl_unlock();
+}
+
+struct neigh_update_work {
+ struct work_struct work;
+ struct neighbour *n;
+ struct mlx5e_neigh_hash_entry *nhe;
+};
+
+static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
+{
+ neigh_release(update_work->n);
+ mlx5e_rep_neigh_entry_release(update_work->nhe);
+ kfree(update_work);
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+ struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
+ work);
+ struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
+ struct neighbour *n = update_work->n;
+ struct mlx5e_encap_entry *e = NULL;
+ bool neigh_connected, same_dev;
+ unsigned char ha[ETH_ALEN];
+ u8 nud_state, dead;
+
+ rtnl_lock();
+
+ /* If these parameters are changed after we release the lock,
+ * we'll receive another event letting us know about it.
+ * We use this lock to avoid inconsistency between the neigh validity
+ * and it's hw address.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
+ read_unlock_bh(&n->lock);
+
+ neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+ trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
+
+ if (!same_dev)
+ goto out;
+
+ /* mlx5e_get_next_init_encap() releases previous encap before returning
+ * the next one.
+ */
+ while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
+ mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
+
+out:
+ rtnl_unlock();
+ mlx5e_release_neigh_update_work(update_work);
+}
+
+static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
+ struct neighbour *n)
+{
+ struct neigh_update_work *update_work;
+ struct mlx5e_neigh_hash_entry *nhe;
+ struct mlx5e_neigh m_neigh = {};
+
+ update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
+ if (WARN_ON(!update_work))
+ return NULL;
+
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ /* Obtain reference to nhe as last step in order not to release it in
+ * atomic context.
+ */
+ rcu_read_lock();
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ rcu_read_unlock();
+ if (!nhe) {
+ kfree(update_work);
+ return NULL;
+ }
+
+ INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
+ neigh_hold(n);
+ update_work->n = n;
+ update_work->nhe = nhe;
+
+ return update_work;
+}
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ neigh_update.netevent_nb);
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+ struct neigh_update_work *update_work;
+ struct neigh_parms *p;
+ struct neighbour *n;
+ bool found = false;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+#else
+ if (n->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ update_work = mlx5e_alloc_neigh_update_work(priv, n);
+ if (!update_work)
+ return NOTIFY_DONE;
+
+ queue_work(priv->wq, &update_work->work);
+ break;
+
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We check the device is present since we don't care about
+ * changes in the default table, we only care about changes
+ * done per device delay prob time parameter.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
+#else
+ if (!p->dev || p->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
+ neigh_list) {
+ if (p->dev == READ_ONCE(nhe->neigh_dev)) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ if (!found)
+ return NOTIFY_DONE;
+
+ neigh_update->min_interval = min_t(unsigned long,
+ NEIGH_VAR(p, DELAY_PROBE_TIME),
+ neigh_update->min_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev,
+ neigh_update->min_interval);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+ .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+ .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+ .key_len = sizeof(struct mlx5e_neigh),
+ .automatic_shrinking = true,
+};
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ int err;
+
+ err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+ if (err)
+ goto out_err;
+
+ INIT_LIST_HEAD(&neigh_update->neigh_list);
+ mutex_init(&neigh_update->encap_lock);
+ INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+ mlx5e_rep_neigh_stats_work);
+ mlx5e_rep_neigh_update_init_interval(rpriv);
+
+ neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&neigh_update->netevent_nb);
+ if (err)
+ goto out_notifier;
+ return 0;
+
+out_notifier:
+ neigh_update->netevent_nb.notifier_call = NULL;
+ rhashtable_destroy(&neigh_update->neigh_ht);
+out_err:
+ netdev_warn(rpriv->netdev,
+ "Failed to initialize neighbours handling for vport %d\n",
+ rpriv->rep->vport);
+ return err;
+}
+
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (!rpriv->neigh_update.netevent_nb.notifier_call)
+ return;
+
+ unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+ flush_workqueue(priv->wq); /* flush neigh update works */
+
+ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+ mutex_destroy(&neigh_update->encap_lock);
+ rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+
+ list_del_rcu(&nhe->neigh_list);
+
+ rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under the representor's encap_lock or
+ * inside rcu read lock section.
+ */
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+ mlx5e_neigh_ht_params);
+ return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
+}
+
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
+ struct mlx5e_neigh_hash_entry **nhe)
+{
+ int err;
+
+ *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+ if (!*nhe)
+ return -ENOMEM;
+
+ (*nhe)->priv = priv;
+ memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
+ spin_lock_init(&(*nhe)->encap_list_lock);
+ INIT_LIST_HEAD(&(*nhe)->encap_list);
+ refcount_set(&(*nhe)->refcnt, 1);
+ WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
+
+ err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+ if (err)
+ goto out_free;
+ return 0;
+
+out_free:
+ kfree(*nhe);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
new file mode 100644
index 000000000..6fe0ab970
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_NEIGH__
+#define __MLX5_EN_REP_NEIGH__
+
+#include "en.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv);
+
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh);
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
+ struct mlx5e_neigh_hash_entry **nhe);
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline int
+mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_NEIGH__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
new file mode 100644
index 000000000..fac7e3ff2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -0,0 +1,900 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <net/dst_metadata.h>
+#include <linux/netdevice.h>
+#include <linux/if_macvlan.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include "tc.h"
+#include "neigh.h"
+#include "en_rep.h"
+#include "eswitch.h"
+#include "lib/fs_chains.h"
+#include "en/tc_ct.h"
+#include "en/mapping.h"
+#include "en/tc_tun.h"
+#include "lib/port_tun.h"
+#include "en/tc/sample.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en/tc/int_port.h"
+#include "en/tc/act/act.h"
+
+struct mlx5e_rep_indr_block_priv {
+ struct net_device *netdev;
+ struct mlx5e_rep_priv *rpriv;
+ enum flow_block_binder_type binder_type;
+
+ struct list_head list;
+};
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+ struct mlx5e_neigh_hash_entry *nhe;
+ int err;
+
+ err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+ if (err)
+ return err;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh);
+ if (!nhe) {
+ err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe);
+ if (err) {
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+ mlx5_tun_entropy_refcount_dec(tun_entropy,
+ e->reformat_type);
+ return err;
+ }
+ }
+
+ e->nhe = nhe;
+ spin_lock(&nhe->encap_list_lock);
+ list_add_rcu(&e->encap_list, &nhe->encap_list);
+ spin_unlock(&nhe->encap_list_lock);
+
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+
+ return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+
+ if (!e->nhe)
+ return;
+
+ spin_lock(&e->nhe->encap_list_lock);
+ list_del_rcu(&e->encap_list);
+ spin_unlock(&e->nhe->encap_list_lock);
+
+ mlx5e_rep_neigh_entry_release(e->nhe);
+ e->nhe = NULL;
+ mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
+}
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN])
+{
+ struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool encap_connected;
+ LIST_HEAD(flow_list);
+
+ ASSERT_RTNL();
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+ if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
+ goto unlock;
+
+ mlx5e_take_all_encap_flows(e, &flow_list);
+
+ if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
+ (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
+ mlx5e_tc_encap_flows_del(priv, e, &flow_list);
+
+ if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ struct net_device *route_dev;
+
+ ether_addr_copy(e->h_dest, ha);
+ ether_addr_copy(eth->h_dest, ha);
+ /* Update the encap source mac, in case that we delete
+ * the flows when encap source mac changed.
+ */
+ route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex);
+ if (route_dev)
+ ether_addr_copy(eth->h_source, route_dev->dev_addr);
+
+ mlx5e_tc_encap_flows_add(priv, e, &flow_list);
+ }
+unlock:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ mlx5e_put_flow_list(priv, &flow_list);
+}
+
+static int
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct flow_cls_offload *cls_flower, int flags)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_DESTROY:
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_STATS:
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static
+int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ switch (ma->command) {
+ case TC_CLSMATCHALL_REPLACE:
+ return mlx5e_tc_configure_matchall(priv, ma);
+ case TC_CLSMATCHALL_DESTROY:
+ return mlx5e_tc_delete_matchall(priv, ma);
+ case TC_CLSMATCHALL_STATS:
+ mlx5e_tc_stats_matchall(priv, ma);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!priv->netdev || !netif_device_present(priv->netdev))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
+ case TC_SETUP_CLSMATCHALL:
+ return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload tmp, *f = type_data;
+ struct mlx5e_priv *priv = cb_priv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ flags = MLX5_TC_FLAG(INGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+ esw = priv->mdev->priv.eswitch;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ if (!mlx5_chains_prios_supported(esw_chains(esw)))
+ return -EOPNOTSUPP;
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)))
+ return -EOPNOTSUPP;
+ if (tmp.common.chain_index != 0)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
+ tmp.common.prio++;
+ err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
+static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct flow_block_offload *f = type_data;
+
+ f->unlocked_driver_cb = true;
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_tc_cb_list,
+ mlx5e_rep_setup_tc_cb,
+ priv, priv, true);
+ case TC_SETUP_FT:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_ft_cb_list,
+ mlx5e_rep_setup_ft_cb,
+ priv, priv, true);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ int err;
+
+ mutex_init(&uplink_priv->unready_flows_lock);
+ INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(uplink_priv);
+ return err;
+}
+
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ /* delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv);
+ mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+}
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
+ mlx5e_tc_reoffload_flows_work);
+}
+
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+}
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
+
+ return NOTIFY_OK;
+}
+
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev,
+ enum flow_block_binder_type binder_type)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv;
+
+ list_for_each_entry(cb_priv,
+ &rpriv->uplink_priv.tc_indr_block_priv_list,
+ list)
+ if (cb_priv->netdev == netdev &&
+ cb_priv->binder_type == binder_type)
+ return cb_priv;
+
+ return NULL;
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+ struct flow_cls_offload *flower,
+ struct mlx5e_rep_indr_block_priv *indr_priv,
+ unsigned long flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+ int err = 0;
+
+ if (!netif_device_present(indr_priv->rpriv->netdev))
+ return -EOPNOTSUPP;
+
+ switch (flower->command) {
+ case FLOW_CLS_REPLACE:
+ err = mlx5e_configure_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_DESTROY:
+ err = mlx5e_delete_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_STATS:
+ err = mlx5e_stats_flower(netdev, priv, flower, flags);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+ flags |= (priv->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) ?
+ MLX5_TC_FLAG(EGRESS) :
+ MLX5_TC_FLAG(INGRESS);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+ struct flow_cls_offload *f = type_data;
+ struct flow_cls_offload tmp;
+ struct mlx5e_priv *mpriv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ mpriv = netdev_priv(priv->rpriv->netdev);
+ esw = mpriv->mdev->priv.eswitch;
+
+ flags = MLX5_TC_FLAG(EGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (!mlx5_chains_prios_supported(esw_chains(esw)) ||
+ tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) ||
+ tmp.common.chain_index)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
+ tmp.common.prio++;
+ err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void mlx5e_rep_indr_block_unbind(void *cb_priv)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
+
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
+static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ return macvlan->mode == MACVLAN_MODE_PASSTHRU;
+}
+
+static int
+mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
+ struct mlx5e_rep_priv *rpriv,
+ struct flow_block_offload *f,
+ flow_setup_cb_t *setup_cb,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool is_ovs_int_port = netif_is_ovs_master(netdev);
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ struct flow_block_cb *block_cb;
+
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+ !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) &&
+ !is_ovs_int_port) {
+ if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev))
+ return -EOPNOTSUPP;
+ if (!mlx5e_rep_macvlan_mode_supported(netdev)) {
+ netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+ f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ return -EOPNOTSUPP;
+
+ if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port)
+ return -EOPNOTSUPP;
+
+ if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw))
+ return -EOPNOTSUPP;
+
+ f->unlocked_driver_cb = true;
+ f->driver_block_list = &mlx5e_block_cb_list;
+
+ switch (f->command) {
+ case FLOW_BLOCK_BIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
+ if (indr_priv)
+ return -EEXIST;
+
+ indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+ if (!indr_priv)
+ return -ENOMEM;
+
+ indr_priv->netdev = netdev;
+ indr_priv->rpriv = rpriv;
+ indr_priv->binder_type = f->binder_type;
+ list_add(&indr_priv->list,
+ &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+ block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv,
+ mlx5e_rep_indr_block_unbind,
+ f, netdev, sch, data, rpriv,
+ cleanup);
+ if (IS_ERR(block_cb)) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ return PTR_ERR(block_cb);
+ }
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
+
+ return 0;
+ case FLOW_BLOCK_UNBIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
+ if (!indr_priv)
+ return -ENOENT;
+
+ block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
+ if (!block_cb)
+ return -ENOENT;
+
+ flow_indr_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int
+mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct flow_action_entry *action;
+ struct mlx5e_tc_act *act;
+ bool add = false;
+ int i;
+
+ /* There is no use case currently for more than one action (e.g. pedit).
+ * when there will be, need to handle cleaning multiple actions on err.
+ */
+ if (!flow_offload_has_one_action(&fl_act->action))
+ return -EOPNOTSUPP;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ flow_action_for_each(i, action, &fl_act->action) {
+ act = mlx5e_tc_act_get(action->id, ns_type);
+ if (!act)
+ continue;
+
+ if (!act->offload_action)
+ continue;
+
+ if (!act->offload_action(priv, fl_act, action))
+ add = true;
+ }
+
+ return add ? 0 : -EOPNOTSUPP;
+}
+
+static int
+mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->destroy_action)
+ return -EOPNOTSUPP;
+
+ return act->destroy_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->stats_action)
+ return -EOPNOTSUPP;
+
+ return act->stats_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ switch (fl_act->command) {
+ case FLOW_ACT_REPLACE:
+ return mlx5e_rep_indr_replace_act(rpriv, fl_act);
+ case FLOW_ACT_DESTROY:
+ return mlx5e_rep_indr_destroy_act(rpriv, fl_act);
+ case FLOW_ACT_STATS:
+ return mlx5e_rep_indr_stats_act(rpriv, fl_act);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv,
+ enum tc_setup_type type,
+ void *data)
+{
+ if (!data)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_ACT:
+ return mlx5e_rep_indr_setup_act(rpriv, data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static
+int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
+ enum tc_setup_type type, void *type_data,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ if (!netdev)
+ return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data);
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
+ mlx5e_rep_indr_setup_tc_cb,
+ data, cleanup);
+ case TC_SETUP_FT:
+ return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
+ mlx5e_rep_indr_setup_ft_cb,
+ data, cleanup);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+
+ /* init indirect block notifications */
+ INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+
+ return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv);
+}
+
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
+{
+ flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv,
+ mlx5e_rep_indr_block_unbind);
+}
+
+static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv,
+ u32 tunnel_id)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct tunnel_match_enc_opts enc_opts = {};
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct metadata_dst *tun_dst;
+ struct tunnel_match_key key;
+ u32 tun_id, enc_opts_id;
+ struct net_device *dev;
+ int err;
+
+ enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
+ tun_id = tunnel_id >> ENC_OPTS_BITS;
+
+ if (!tun_id)
+ return true;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
+ if (err) {
+ WARN_ON_ONCE(true);
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel for tun_id: %d, err: %d\n",
+ tun_id, err);
+ return false;
+ }
+
+ if (enc_opts_id) {
+ err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
+ enc_opts_id, &enc_opts);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
+ enc_opts_id, err);
+ return false;
+ }
+ }
+
+ if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
+ key.enc_ip.tos, key.enc_ip.ttl,
+ key.enc_tp.dst, TUNNEL_KEY,
+ key32_to_tunnel_id(key.enc_key_id.keyid),
+ enc_opts.key.len);
+ } else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
+ key.enc_ip.tos, key.enc_ip.ttl,
+ key.enc_tp.dst, 0, TUNNEL_KEY,
+ key32_to_tunnel_id(key.enc_key_id.keyid),
+ enc_opts.key.len);
+ } else {
+ netdev_dbg(priv->netdev,
+ "Couldn't restore tunnel, unsupported addr_type: %d\n",
+ key.enc_control.addr_type);
+ return false;
+ }
+
+ if (!tun_dst) {
+ netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
+ return false;
+ }
+
+ tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
+
+ if (enc_opts.key.len)
+ ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
+ enc_opts.key.data,
+ enc_opts.key.len,
+ enc_opts.key.dst_opt_type);
+
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
+ dev = dev_get_by_index(&init_net, key.filter_ifindex);
+ if (!dev) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel device with ifindex: %d\n",
+ key.filter_ifindex);
+ return false;
+ }
+
+ /* Set fwd_dev so we do dev_put() after datapath */
+ tc_priv->fwd_dev = dev;
+
+ skb->dev = dev;
+
+ return true;
+}
+
+static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+ struct mlx5e_priv *priv = netdev_priv(skb->dev);
+ u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (chain) {
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct tc_skb_ext *tc_skb_ext;
+ struct mlx5_eswitch *esw;
+ u32 zone_restore_id;
+
+ tc_skb_ext = tc_skb_ext_alloc(skb);
+ if (!tc_skb_ext) {
+ WARN_ON(1);
+ return false;
+ }
+ tc_skb_ext->chain = chain;
+ zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
+ esw = priv->mdev->priv.eswitch;
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
+ zone_restore_id))
+ return false;
+ }
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+ return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+}
+
+static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (tc_priv->fwd_dev)
+ dev_put(tc_priv->fwd_dev);
+}
+
+static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_mapped_obj *mapped_obj,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
+ netdev_dbg(priv->netdev,
+ "Failed to restore tunnel info for sampled packet\n");
+ return;
+ }
+ mlx5e_tc_sample_skb(skb, mapped_obj);
+ mlx5_rep_tc_post_napi_receive(tc_priv);
+}
+
+static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_mapped_obj *mapped_obj,
+ struct mlx5e_tc_update_priv *tc_priv,
+ bool *forward_tx,
+ u32 reg_c1)
+{
+ u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ /* Tunnel restore takes precedence over int port restore */
+ if (tunnel_id)
+ return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
+ mapped_obj->int_port_metadata, forward_tx)) {
+ /* Set fwd_dev for future dev_put */
+ tc_priv->fwd_dev = skb->dev;
+
+ return true;
+ }
+
+ return false;
+}
+
+void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
+ struct sk_buff *skb)
+{
+ u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
+ struct mlx5e_tc_update_priv tc_priv = {};
+ struct mlx5_mapped_obj mapped_obj;
+ struct mlx5_eswitch *esw;
+ bool forward_tx = false;
+ struct mlx5e_priv *priv;
+ u32 reg_c0;
+ int err;
+
+ reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
+ if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
+ goto forward;
+
+ /* If reg_c0 is not equal to the default flow tag then skb->mark
+ * is not supported and must be reset back to 0.
+ */
+ skb->mark = 0;
+
+ priv = netdev_priv(skb->dev);
+ esw = priv->mdev->priv.eswitch;
+ err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find mapped object for reg_c0: %d, err: %d\n",
+ reg_c0, err);
+ goto free_skb;
+ }
+
+ if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+ if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) &&
+ !mlx5_ipsec_is_rx_flow(cqe))
+ goto free_skb;
+ } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
+ mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv);
+ goto free_skb;
+ } else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) {
+ if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv,
+ &forward_tx, reg_c1))
+ goto free_skb;
+ } else {
+ netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
+ goto free_skb;
+ }
+
+forward:
+ if (forward_tx)
+ dev_queue_xmit(skb);
+ else
+ napi_gro_receive(rq->cq.napi, skb);
+
+ mlx5_rep_tc_post_napi_receive(&tc_priv);
+
+ return;
+
+free_skb:
+ dev_kfree_skb_any(skb);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
new file mode 100644
index 000000000..7c9dd3a75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_TC_H__
+#define __MLX5_EN_REP_TC_H__
+
+#include <linux/skbuff.h>
+#include "en_tc.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv);
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv);
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv);
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv);
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv);
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN]);
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev);
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data);
+
+void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
+ struct sk_buff *skb);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+struct mlx5e_rep_priv;
+static inline int
+mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+static inline int
+mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) {}
+
+static inline void
+mlx5e_rep_tc_enable(struct mlx5e_priv *priv) {}
+static inline void
+mlx5e_rep_tc_disable(struct mlx5e_priv *priv) {}
+
+static inline int
+mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) { return NOTIFY_DONE; }
+
+static inline int
+mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data) { return -EOPNOTSUPP; }
+
+static inline void
+mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
+ struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); }
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
new file mode 100644
index 000000000..9b1f1369a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -0,0 +1,759 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "health.h"
+#include "params.h"
+#include "txrx.h"
+#include "devlink.h"
+#include "ptp.h"
+#include "lib/tout.h"
+
+static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+ void *out;
+ void *rqc;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_rq(dev, rqn, out);
+ if (err)
+ goto out;
+
+ rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+ *state = MLX5_GET(rqc, rqc, state);
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
+{
+ struct mlx5_core_dev *dev = icosq->channel->mdev;
+ unsigned long exp_time;
+
+ exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
+
+ while (time_before(jiffies, exp_time)) {
+ if (icosq->cc == icosq->pc)
+ return 0;
+
+ msleep(20);
+ }
+
+ netdev_err(icosq->channel->netdev,
+ "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n",
+ icosq->sqn, icosq->cc, icosq->pc);
+
+ return -ETIMEDOUT;
+}
+
+static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
+{
+ WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+ icosq->sqn, icosq->cc, icosq->pc);
+ icosq->cc = 0;
+ icosq->pc = 0;
+}
+
+static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+{
+ struct mlx5e_rq *xskrq = NULL;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_icosq *icosq;
+ struct net_device *dev;
+ struct mlx5e_rq *rq;
+ u8 state;
+ int err;
+
+ icosq = ctx;
+
+ mutex_lock(&icosq->channel->icosq_recovery_lock);
+
+ /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
+ rq = &icosq->channel->rq;
+ if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
+ xskrq = &icosq->channel->xskrq;
+ mdev = icosq->channel->mdev;
+ dev = icosq->channel->netdev;
+ err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
+ if (err) {
+ netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n",
+ icosq->sqn, err);
+ goto out;
+ }
+
+ if (state != MLX5_SQC_STATE_ERR)
+ goto out;
+
+ mlx5e_deactivate_rq(rq);
+ if (xskrq)
+ mlx5e_deactivate_rq(xskrq);
+
+ err = mlx5e_wait_for_icosq_flush(icosq);
+ if (err)
+ goto out;
+
+ mlx5e_deactivate_icosq(icosq);
+
+ /* At this point, both the rq and the icosq are disabled */
+
+ err = mlx5e_health_sq_to_ready(mdev, dev, icosq->sqn);
+ if (err)
+ goto out;
+
+ mlx5e_reset_icosq_cc_pc(icosq);
+
+ mlx5e_free_rx_in_progress_descs(rq);
+ if (xskrq)
+ mlx5e_free_rx_in_progress_descs(xskrq);
+
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+ mlx5e_activate_icosq(icosq);
+
+ mlx5e_activate_rq(rq);
+ rq->stats->recover++;
+
+ if (xskrq) {
+ mlx5e_activate_rq(xskrq);
+ xskrq->stats->recover++;
+ }
+
+ mlx5e_trigger_napi_icosq(icosq->channel);
+
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+
+ return 0;
+out:
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+ return err;
+}
+
+static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
+{
+ struct mlx5e_rq *rq = ctx;
+ int err;
+
+ mlx5e_deactivate_rq(rq);
+ err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR);
+ clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
+ if (err)
+ return err;
+
+ mlx5e_activate_rq(rq);
+ rq->stats->recover++;
+ if (rq->channel)
+ mlx5e_trigger_napi_icosq(rq->channel);
+ else
+ mlx5e_trigger_napi_sched(rq->cq.napi);
+ return 0;
+}
+
+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
+{
+ struct mlx5_eq_comp *eq;
+ struct mlx5e_rq *rq;
+ int err;
+
+ rq = ctx;
+ eq = rq->cq.mcq.eq;
+
+ err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats);
+ if (err && rq->icosq)
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);
+
+ return err;
+}
+
+static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
+{
+ return err_ctx->recover(err_ctx->ctx);
+}
+
+static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
+ void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) :
+ mlx5e_health_recover_channels(priv);
+}
+
+static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "WQE size",
+ mlx5_wq_cyc_get_size(&icosq->wq));
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq));
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ u16 wqe_counter;
+ int wqes_sz;
+ u8 hw_state;
+ u16 wq_head;
+ int err;
+
+ err = mlx5e_query_rq_state(rq->mdev, rq->rqn, &hw_state);
+ if (err)
+ return err;
+
+ wqes_sz = mlx5e_rqwq_get_cur_sz(rq);
+ wq_head = mlx5e_rqwq_get_head(rq);
+ wqe_counter = mlx5e_rqwq_get_wqe_counter(rq);
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head);
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg);
+ if (err)
+ return err;
+
+ if (rq->icosq) {
+ struct mlx5e_icosq *icosq = rq->icosq;
+ u8 icosq_hw_state;
+
+ err = mlx5_core_query_sq_state(rq->mdev, icosq->sqn, &icosq_hw_state);
+ if (err)
+ return err;
+
+ err = mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix);
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
+ if (err)
+ return err;
+
+ return devlink_fmsg_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = rq->priv;
+ struct mlx5e_params *params;
+ u32 rq_stride, rq_sz;
+ bool real_time;
+ int err;
+
+ params = &priv->channels.params;
+ rq_sz = mlx5e_rqwq_get_size(rq);
+ real_time = mlx5_is_real_time_rq(priv->mdev);
+ rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter);
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq;
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config");
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg);
+ if (err)
+ return err;
+
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+ err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg);
+ if (err)
+ return err;
+ }
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq,
+ struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
+ if (err)
+ return err;
+
+ err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ int i, err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg);
+ if (err)
+ goto unlock;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
+ if (err)
+ goto unlock;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5e_rq *rq;
+
+ rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ?
+ &c->xskrq : &c->rq;
+
+ err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
+ if (err)
+ goto unlock;
+ }
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+ err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg);
+ if (err)
+ goto unlock;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_txqsq *icosq = ctx;
+ struct mlx5_rsc_key key = {};
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = icosq->sqn;
+ key.num_of_obj1 = 1;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
+ key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5_rsc_key key = {};
+ struct mlx5e_rq *rq = ctx;
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = rq->rqn;
+ key.num_of_obj1 = 1;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_RCV_BUFF;
+ key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ struct mlx5_rsc_key key = {};
+ int i, err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
+ if (err)
+ return err;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
+
+ err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ");
+ if (err)
+ return err;
+ }
+
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+ err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ");
+ if (err)
+ return err;
+ }
+
+ return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
+ struct mlx5e_err_ctx *err_ctx,
+ struct devlink_fmsg *fmsg)
+{
+ return err_ctx->dump(priv, fmsg, err_ctx->ctx);
+}
+
+static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
+ mlx5e_rx_reporter_dump_all_rqs(priv, fmsg);
+}
+
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_icosq *icosq = rq->icosq;
+ struct mlx5e_priv *priv = rq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+ char icosq_str[32] = {};
+
+ err_ctx.ctx = rq;
+ err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
+ err_ctx.dump = mlx5e_rx_reporter_dump_rq;
+
+ if (icosq)
+ snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn);
+ snprintf(err_str, sizeof(err_str),
+ "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x",
+ rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_priv *priv = rq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = rq;
+ err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover;
+ err_ctx.dump = mlx5e_rx_reporter_dump_rq;
+ snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
+{
+ struct mlx5e_priv *priv = icosq->channel->priv;
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = icosq;
+ err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover;
+ err_ctx.dump = mlx5e_rx_reporter_dump_icosq;
+ snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn);
+
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
+{
+ mutex_lock(&c->icosq_recovery_lock);
+}
+
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
+{
+ mutex_unlock(&c->icosq_recovery_lock);
+}
+
+static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
+ .name = "rx",
+ .recover = mlx5e_rx_reporter_recover,
+ .diagnose = mlx5e_rx_reporter_diagnose,
+ .dump = mlx5e_rx_reporter_dump,
+};
+
+#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500
+
+void mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops,
+ MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv);
+ if (IS_ERR(reporter)) {
+ netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
+ PTR_ERR(reporter));
+ return;
+ }
+ priv->rx_reporter = reporter;
+}
+
+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
+{
+ if (!priv->rx_reporter)
+ return;
+
+ devlink_port_health_reporter_destroy(priv->rx_reporter);
+ priv->rx_reporter = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
new file mode 100644
index 000000000..60bc5b577
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -0,0 +1,614 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "health.h"
+#include "en/ptp.h"
+#include "en/devlink.h"
+#include "lib/tout.h"
+
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_core_dev *dev = sq->mdev;
+ unsigned long exp_time;
+
+ exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
+
+ while (time_before(jiffies, exp_time)) {
+ if (sq->cc == sq->pc)
+ return 0;
+
+ msleep(20);
+ }
+
+ netdev_err(sq->netdev,
+ "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+
+ return -ETIMEDOUT;
+}
+
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+ WARN_ONCE(sq->cc != sq->pc,
+ "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+ sq->cc = 0;
+ sq->dma_fifo_cc = 0;
+ sq->pc = 0;
+}
+
+static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
+{
+ struct mlx5_core_dev *mdev;
+ struct net_device *dev;
+ struct mlx5e_txqsq *sq;
+ u8 state;
+ int err;
+
+ sq = ctx;
+ mdev = sq->mdev;
+ dev = sq->netdev;
+
+ if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
+ return 0;
+
+ err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+ if (err) {
+ netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+ sq->sqn, err);
+ goto out;
+ }
+
+ if (state != MLX5_SQC_STATE_ERR)
+ goto out;
+
+ mlx5e_tx_disable_queue(sq->txq);
+
+ err = mlx5e_wait_for_sq_flush(sq);
+ if (err)
+ goto out;
+
+ /* At this point, no new packets will arrive from the stack as TXQ is
+ * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+ * pending WQEs. SQ can safely reset the SQ.
+ */
+
+ err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn);
+ if (err)
+ goto out;
+
+ mlx5e_reset_txqsq_cc_pc(sq);
+ sq->stats->recover++;
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ mlx5e_activate_txqsq(sq);
+
+ return 0;
+out:
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ return err;
+}
+
+struct mlx5e_tx_timeout_ctx {
+ struct mlx5e_txqsq *sq;
+ signed int status;
+};
+
+static int mlx5e_tx_reporter_timeout_recover(void *ctx)
+{
+ struct mlx5e_tx_timeout_ctx *to_ctx;
+ struct mlx5e_priv *priv;
+ struct mlx5_eq_comp *eq;
+ struct mlx5e_txqsq *sq;
+ int err;
+
+ to_ctx = ctx;
+ sq = to_ctx->sq;
+ eq = sq->cq.mcq.eq;
+ priv = sq->priv;
+ err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
+ if (!err) {
+ to_ctx->status = 0; /* this sq recovered */
+ return err;
+ }
+
+ err = mlx5e_safe_reopen_channels(priv);
+ if (!err) {
+ to_ctx->status = 1; /* all channels recovered */
+ return err;
+ }
+
+ to_ctx->status = err;
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ netdev_err(priv->netdev,
+ "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
+ err);
+
+ return err;
+}
+
+/* state lock cannot be grabbed within this function.
+ * It can cause a dead lock or a read-after-free.
+ */
+static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
+{
+ return err_ctx->recover(err_ctx->ctx);
+}
+
+static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
+ void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
+ mlx5e_health_recover_channels(priv);
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
+ struct mlx5e_txqsq *sq, int tc)
+{
+ bool stopped = netif_xmit_stopped(sq->txq);
+ struct mlx5e_priv *priv = sq->priv;
+ u8 state;
+ int err;
+
+ err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg);
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
+ struct mlx5e_txqsq *sq, int tc)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg,
+ struct mlx5e_ptpsq *ptpsq, int tc)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int
+mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
+ struct mlx5e_txqsq *txqsq)
+{
+ u32 sq_stride, sq_sz;
+ bool real_time;
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
+ if (err)
+ return err;
+
+ real_time = mlx5_is_real_time_sq(txqsq->mdev);
+ sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq);
+ sq_stride = MLX5_SEND_WQE_BB;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg,
+ struct mlx5e_ptpsq *ptpsq)
+{
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
+ if (err)
+ return err;
+
+ err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ struct mlx5e_ptpsq *generic_ptpsq;
+ int err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config");
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq);
+ if (err)
+ return err;
+
+ if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
+ goto out;
+
+ generic_ptpsq = &ptp_ch->ptpsq[0];
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq);
+ if (err)
+ return err;
+
+ err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+out:
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+
+ int i, tc, err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg);
+ if (err)
+ goto unlock;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
+ if (err)
+ goto unlock;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ struct mlx5e_txqsq *sq = &c->sq[tc];
+
+ err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
+ if (err)
+ goto unlock;
+ }
+ }
+
+ if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
+ goto close_sqs_nest;
+
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
+ &ptp_ch->ptpsq[tc],
+ tc);
+ if (err)
+ goto unlock;
+ }
+
+close_sqs_nest:
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ goto unlock;
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5_rsc_key key = {};
+ struct mlx5e_txqsq *sq = ctx;
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+ key.index1 = sq->sqn;
+ key.num_of_obj1 = 1;
+
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
+ if (err)
+ return err;
+
+ key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
+ key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
+
+ return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
+}
+
+static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
+ struct mlx5_rsc_key key = {};
+ int i, tc, err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
+ if (err)
+ return err;
+
+ key.size = PAGE_SIZE;
+ key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+ err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
+ if (err)
+ return err;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ struct mlx5e_txqsq *sq = &c->sq[tc];
+
+ err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
+ if (err)
+ return err;
+ }
+ }
+
+ if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
+ for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
+ struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
+
+ err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
+ if (err)
+ return err;
+ }
+ }
+
+ return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
+ struct mlx5e_err_ctx *err_ctx,
+ struct devlink_fmsg *fmsg)
+{
+ return err_ctx->dump(priv, fmsg, err_ctx->ctx);
+}
+
+static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *context,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
+ mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
+}
+
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_priv *priv = sq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ err_ctx.ctx = sq;
+ err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
+ err_ctx.dump = mlx5e_tx_reporter_dump_sq;
+ snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn);
+
+ mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
+}
+
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_tx_timeout_ctx to_ctx = {};
+ struct mlx5e_priv *priv = sq->priv;
+ struct mlx5e_err_ctx err_ctx = {};
+
+ to_ctx.sq = sq;
+ err_ctx.ctx = &to_ctx;
+ err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+ err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
+ snprintf(err_str, sizeof(err_str),
+ "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
+ sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+ jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start)));
+
+ mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
+ return to_ctx.status;
+}
+
+static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
+ .name = "tx",
+ .recover = mlx5e_tx_reporter_recover,
+ .diagnose = mlx5e_tx_reporter_diagnose,
+ .dump = mlx5e_tx_reporter_dump,
+};
+
+#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
+
+void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
+{
+ struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops,
+ MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
+ if (IS_ERR(reporter)) {
+ netdev_warn(priv->netdev,
+ "Failed to create tx reporter, err = %ld\n",
+ PTR_ERR(reporter));
+ return;
+ }
+ priv->tx_reporter = reporter;
+}
+
+void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
+{
+ if (!priv->tx_reporter)
+ return;
+
+ devlink_port_health_reporter_destroy(priv->tx_reporter);
+ priv->tx_reporter = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
new file mode 100644
index 000000000..b915fb29d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rqt.h"
+#include <linux/mlx5/transobj.h>
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels)
+{
+ unsigned int i;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir->table[i] = i % num_channels;
+}
+
+static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u16 max_size, u32 *init_rqns, u16 init_size)
+{
+ void *rqtc;
+ int inlen;
+ int err;
+ u32 *in;
+ int i;
+
+ rqt->mdev = mdev;
+ rqt->size = max_size;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size);
+ for (i = 0; i < init_size; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]);
+
+ err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ bool indir_enabled, u32 init_rqn)
+{
+ u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1;
+
+ return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1);
+}
+
+static int mlx5e_bits_invert(unsigned long a, int size)
+{
+ int inv = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+
+ return inv;
+}
+
+static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ unsigned int i;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) {
+ unsigned int ix = i;
+
+ if (hfunc == ETH_RSS_HASH_XOR)
+ ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE));
+
+ ix = indir->table[ix];
+
+ if (WARN_ON(ix >= num_rqns))
+ /* Could be a bug in the driver or in the kernel part of
+ * ethtool: indir table refers to non-existent RQs.
+ */
+ return -EINVAL;
+ rss_rqns[i] = rqns[ix];
+ }
+
+ return 0;
+}
+
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ u32 *rss_rqns;
+ int err;
+
+ rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+ if (!rss_rqns)
+ return -ENOMEM;
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (err)
+ goto out;
+
+ err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+ kvfree(rss_rqns);
+ return err;
+}
+
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
+{
+ mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
+}
+
+static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size)
+{
+ unsigned int i;
+ void *rqtc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+
+ MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, size);
+ for (i = 0; i < size; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]);
+
+ err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn)
+{
+ return mlx5e_rqt_redirect(rqt, &rqn, 1);
+}
+
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+ u32 *rss_rqns;
+ int err;
+
+ if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE))
+ return -EINVAL;
+
+ rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+ if (!rss_rqns)
+ return -ENOMEM;
+
+ err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+ if (err)
+ goto out;
+
+ err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+ kvfree(rss_rqns);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
new file mode 100644
index 000000000..60c985a12
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RQT_H__
+#define __MLX5_EN_RQT_H__
+
+#include <linux/kernel.h>
+
+#define MLX5E_INDIR_RQT_SIZE (1 << 8)
+
+struct mlx5_core_dev;
+
+struct mlx5e_rss_params_indir {
+ u32 table[MLX5E_INDIR_RQT_SIZE];
+};
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels);
+
+struct mlx5e_rqt {
+ struct mlx5_core_dev *mdev;
+ u32 rqtn;
+ u16 size;
+};
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ bool indir_enabled, u32 init_rqn);
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+ u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir);
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt);
+
+static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
+{
+ return rqt->rqtn;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn);
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+ u8 hfunc, struct mlx5e_rss_params_indir *indir);
+
+#endif /* __MLX5_EN_RQT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
new file mode 100644
index 000000000..7f93426b8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.
+
+#include "rss.h"
+
+#define mlx5e_rss_warn(__dev, format, ...) \
+ dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = {
+ [MLX5_TT_IPV4_TCP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV6_TCP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV4_UDP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV6_UDP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5_TT_IPV4_IPSEC_AH] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV6_IPSEC_AH] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV4_IPSEC_ESP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV6_IPSEC_ESP] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5_TT_IPV4] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+ [MLX5_TT_IPV6] = {
+ .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+};
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt)
+{
+ return rss_default_config[tt];
+}
+
+struct mlx5e_rss {
+ struct mlx5e_rss_params_hash hash;
+ struct mlx5e_rss_params_indir indir;
+ u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_rqt rqt;
+ struct mlx5_core_dev *mdev;
+ u32 drop_rqn;
+ bool inner_ft_support;
+ bool enabled;
+ refcount_t refcnt;
+};
+
+struct mlx5e_rss *mlx5e_rss_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_rss), GFP_KERNEL);
+}
+
+void mlx5e_rss_free(struct mlx5e_rss *rss)
+{
+ kvfree(rss);
+}
+
+static void mlx5e_rss_params_init(struct mlx5e_rss *rss)
+{
+ enum mlx5_traffic_types tt;
+
+ rss->hash.hfunc = ETH_RSS_HASH_TOP;
+ netdev_rss_key_fill(rss->hash.toeplitz_hash_key,
+ sizeof(rss->hash.toeplitz_hash_key));
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ rss->rx_hash_fields[tt] =
+ mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
+}
+
+static struct mlx5e_tir **rss_get_tirp(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ return inner ? &rss->inner_tir[tt] : &rss->tir[tt];
+}
+
+static struct mlx5e_tir *rss_get_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ return *rss_get_tirp(rss, tt, inner);
+}
+
+static struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+
+ rss_tt = mlx5e_rss_get_default_tt_config(tt);
+ rss_tt.rx_hash_fields = rss->rx_hash_fields[tt];
+ return rss_tt;
+}
+
+static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+ struct mlx5e_tir_builder *builder;
+ struct mlx5e_tir **tir_p;
+ struct mlx5e_tir *tir;
+ u32 rqtn;
+ int err;
+
+ if (inner && !rss->inner_ft_support) {
+ mlx5e_rss_warn(rss->mdev,
+ "Cannot create inner indirect TIR[%d], RSS inner FT is not supported.\n",
+ tt);
+ return -EINVAL;
+ }
+
+ tir_p = rss_get_tirp(rss, tt, inner);
+ if (*tir_p)
+ return -EINVAL;
+
+ tir = kvzalloc(sizeof(*tir), GFP_KERNEL);
+ if (!tir)
+ return -ENOMEM;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder) {
+ err = -ENOMEM;
+ goto free_tir;
+ }
+
+ rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
+ mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
+ rqtn, rss->inner_ft_support);
+ mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+ rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+ mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+
+ err = mlx5e_tir_init(tir, builder, rss->mdev, true);
+ mlx5e_tir_builder_free(builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to create %sindirect TIR: err = %d, tt = %d\n",
+ inner ? "inner " : "", err, tt);
+ goto free_tir;
+ }
+
+ *tir_p = tir;
+ return 0;
+
+free_tir:
+ kvfree(tir);
+ return err;
+}
+
+static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_tir **tir_p;
+ struct mlx5e_tir *tir;
+
+ tir_p = rss_get_tirp(rss, tt, inner);
+ if (!*tir_p)
+ return;
+
+ tir = *tir_p;
+ mlx5e_tir_destroy(tir);
+ kvfree(tir);
+ *tir_p = NULL;
+}
+
+static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner)
+{
+ enum mlx5_traffic_types tt, max_tt;
+ int err;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+ if (err)
+ goto err_destroy_tirs;
+ }
+
+ return 0;
+
+err_destroy_tirs:
+ max_tt = tt;
+ for (tt = 0; tt < max_tt; tt++)
+ mlx5e_rss_destroy_tir(rss, tt, inner);
+ return err;
+}
+
+static void mlx5e_rss_destroy_tirs(struct mlx5e_rss *rss, bool inner)
+{
+ enum mlx5_traffic_types tt;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ mlx5e_rss_destroy_tir(rss, tt, inner);
+}
+
+static int mlx5e_rss_update_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+ struct mlx5e_tir_builder *builder;
+ struct mlx5e_tir *tir;
+ int err;
+
+ tir = rss_get_tir(rss, tt, inner);
+ if (!tir)
+ return 0;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+
+ mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+ err = mlx5e_tir_modify(tir, builder);
+
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+static int mlx5e_rss_update_tirs(struct mlx5e_rss *rss)
+{
+ enum mlx5_traffic_types tt;
+ int err, retval;
+
+ retval = 0;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_rss_update_tir(rss, tt, false);
+ if (err) {
+ retval = retval ? : err;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ }
+
+ if (!rss->inner_ft_support)
+ continue;
+
+ err = mlx5e_rss_update_tir(rss, tt, true);
+ if (err) {
+ retval = retval ? : err;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ }
+ }
+ return retval;
+}
+
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn)
+{
+ rss->mdev = mdev;
+ rss->inner_ft_support = inner_ft_support;
+ rss->drop_rqn = drop_rqn;
+
+ mlx5e_rss_params_init(rss);
+ refcount_set(&rss->refcnt, 1);
+
+ return mlx5e_rqt_init_direct(&rss->rqt, mdev, true, drop_rqn);
+}
+
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param)
+{
+ int err;
+
+ err = mlx5e_rss_init_no_tirs(rss, mdev, inner_ft_support, drop_rqn);
+ if (err)
+ goto err_out;
+
+ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false);
+ if (err)
+ goto err_destroy_rqt;
+
+ if (inner_ft_support) {
+ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true);
+ if (err)
+ goto err_destroy_tirs;
+ }
+
+ return 0;
+
+err_destroy_tirs:
+ mlx5e_rss_destroy_tirs(rss, false);
+err_destroy_rqt:
+ mlx5e_rqt_destroy(&rss->rqt);
+err_out:
+ return err;
+}
+
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss)
+{
+ if (!refcount_dec_if_one(&rss->refcnt))
+ return -EBUSY;
+
+ mlx5e_rss_destroy_tirs(rss, false);
+
+ if (rss->inner_ft_support)
+ mlx5e_rss_destroy_tirs(rss, true);
+
+ mlx5e_rqt_destroy(&rss->rqt);
+
+ return 0;
+}
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss)
+{
+ refcount_inc(&rss->refcnt);
+}
+
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss)
+{
+ refcount_dec(&rss->refcnt);
+}
+
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss)
+{
+ return refcount_read(&rss->refcnt);
+}
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_tir *tir;
+
+ WARN_ON(inner && !rss->inner_ft_support);
+ tir = rss_get_tir(rss, tt, inner);
+ WARN_ON(!tir);
+
+ return mlx5e_tir_get_tirn(tir);
+}
+
+/* Fill the "tirn" output parameter.
+ * Create the requested TIR if it's its first usage.
+ */
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner, u32 *tirn)
+{
+ struct mlx5e_tir *tir;
+
+ tir = rss_get_tir(rss, tt, inner);
+ if (!tir) { /* TIR doesn't exist, create one */
+ int err;
+
+ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+ if (err)
+ return err;
+ tir = rss_get_tir(rss, tt, inner);
+ }
+
+ *tirn = mlx5e_tir_get_tirn(tir);
+ return 0;
+}
+
+static int mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+ int err;
+
+ err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir);
+ if (err)
+ mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n",
+ mlx5e_rqt_get_rqtn(&rss->rqt), err);
+ return err;
+}
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+ rss->enabled = true;
+ mlx5e_rss_apply(rss, rqns, num_rqns);
+}
+
+void mlx5e_rss_disable(struct mlx5e_rss *rss)
+{
+ int err;
+
+ rss->enabled = false;
+ err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn);
+ if (err)
+ mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n",
+ mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
+}
+
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+ struct mlx5e_packet_merge_param *pkt_merge_param)
+{
+ struct mlx5e_tir_builder *builder;
+ enum mlx5_traffic_types tt;
+ int err, final_err;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+
+ final_err = 0;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_tir *tir;
+
+ tir = rss_get_tir(rss, tt, false);
+ if (!tir)
+ goto inner_tir;
+ err = mlx5e_tir_modify(tir, builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+
+inner_tir:
+ if (!rss->inner_ft_support)
+ continue;
+
+ tir = rss_get_tir(rss, tt, true);
+ if (!tir)
+ continue;
+ err = mlx5e_tir_modify(tir, builder);
+ if (err) {
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+}
+
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc)
+{
+ unsigned int i;
+
+ if (indir)
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir[i] = rss->indir.table[i];
+
+ if (key)
+ memcpy(key, rss->hash.toeplitz_hash_key,
+ sizeof(rss->hash.toeplitz_hash_key));
+
+ if (hfunc)
+ *hfunc = rss->hash.hfunc;
+
+ return 0;
+}
+
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+ const u8 *key, const u8 *hfunc,
+ u32 *rqns, unsigned int num_rqns)
+{
+ bool changed_indir = false;
+ bool changed_hash = false;
+ struct mlx5e_rss *old_rss;
+ int err = 0;
+
+ old_rss = mlx5e_rss_alloc();
+ if (!old_rss)
+ return -ENOMEM;
+
+ *old_rss = *rss;
+
+ if (hfunc && *hfunc != rss->hash.hfunc) {
+ switch (*hfunc) {
+ case ETH_RSS_HASH_XOR:
+ case ETH_RSS_HASH_TOP:
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ changed_hash = true;
+ changed_indir = true;
+ rss->hash.hfunc = *hfunc;
+ }
+
+ if (key) {
+ if (rss->hash.hfunc == ETH_RSS_HASH_TOP)
+ changed_hash = true;
+ memcpy(rss->hash.toeplitz_hash_key, key,
+ sizeof(rss->hash.toeplitz_hash_key));
+ }
+
+ if (indir) {
+ unsigned int i;
+
+ changed_indir = true;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ rss->indir.table[i] = indir[i];
+ }
+
+ if (changed_indir && rss->enabled) {
+ err = mlx5e_rss_apply(rss, rqns, num_rqns);
+ if (err) {
+ *rss = *old_rss;
+ goto out;
+ }
+ }
+
+ if (changed_hash)
+ mlx5e_rss_update_tirs(rss);
+
+out:
+ mlx5e_rss_free(old_rss);
+ return err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss)
+{
+ return rss->hash;
+}
+
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+ return rss->rx_hash_fields[tt];
+}
+
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields)
+{
+ u8 old_rx_hash_fields;
+ int err;
+
+ old_rx_hash_fields = rss->rx_hash_fields[tt];
+
+ if (old_rx_hash_fields == rx_hash_fields)
+ return 0;
+
+ rss->rx_hash_fields[tt] = rx_hash_fields;
+
+ err = mlx5e_rss_update_tir(rss, tt, false);
+ if (err) {
+ rss->rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ return err;
+ }
+
+ if (!(rss->inner_ft_support))
+ return 0;
+
+ err = mlx5e_rss_update_tir(rss, tt, true);
+ if (err) {
+ /* Partial update happened. Try to revert - it may fail too, but
+ * there is nothing more we can do.
+ */
+ rss->rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5e_rss_warn(rss->mdev,
+ "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ if (mlx5e_rss_update_tir(rss, tt, false))
+ mlx5e_rss_warn(rss->mdev,
+ "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n",
+ tt);
+ }
+
+ return err;
+}
+
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch)
+{
+ mlx5e_rss_params_indir_init_uniform(&rss->indir, nch);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
new file mode 100644
index 000000000..c6b216416
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_RSS_H__
+#define __MLX5_EN_RSS_H__
+
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt);
+
+struct mlx5e_rss;
+
+struct mlx5e_rss *mlx5e_rss_alloc(void);
+void mlx5e_rss_free(struct mlx5e_rss *rss);
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param);
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn);
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss);
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss);
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss);
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner);
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner, u32 *tirn);
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
+void mlx5e_rss_disable(struct mlx5e_rss *rss);
+
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+ struct mlx5e_packet_merge_param *pkt_merge_param);
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+ const u8 *key, const u8 *hfunc,
+ u32 *rqns, unsigned int num_rqns);
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss);
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt);
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch);
+#endif /* __MLX5_EN_RSS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
new file mode 100644
index 000000000..e1095bc36
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -0,0 +1,640 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rx_res.h"
+#include "channels.h"
+#include "params.h"
+
+#define MLX5E_MAX_NUM_RSS 16
+
+struct mlx5e_rx_res {
+ struct mlx5_core_dev *mdev;
+ enum mlx5e_rx_res_features features;
+ unsigned int max_nch;
+ u32 drop_rqn;
+
+ struct mlx5e_packet_merge_param pkt_merge_param;
+ struct rw_semaphore pkt_merge_param_sem;
+
+ struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS];
+ bool rss_active;
+ u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+ unsigned int rss_nch;
+
+ struct {
+ struct mlx5e_rqt direct_rqt;
+ struct mlx5e_tir direct_tir;
+ } *channels;
+
+ struct {
+ struct mlx5e_rqt rqt;
+ struct mlx5e_tir tir;
+ } ptp;
+};
+
+/* API for rx_res_rss_* */
+
+static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
+ unsigned int init_nch)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_rss *rss;
+ int err;
+
+ if (WARN_ON(res->rss[0]))
+ return -EINVAL;
+
+ rss = mlx5e_rss_alloc();
+ if (!rss)
+ return -ENOMEM;
+
+ err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
+ &res->pkt_merge_param);
+ if (err)
+ goto err_rss_free;
+
+ mlx5e_rss_set_indir_uniform(rss, init_nch);
+
+ res->rss[0] = rss;
+
+ return 0;
+
+err_rss_free:
+ mlx5e_rss_free(rss);
+ return err;
+}
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_rss *rss;
+ int err, i;
+
+ for (i = 1; i < MLX5E_MAX_NUM_RSS; i++)
+ if (!res->rss[i])
+ break;
+
+ if (i == MLX5E_MAX_NUM_RSS)
+ return -ENOSPC;
+
+ rss = mlx5e_rss_alloc();
+ if (!rss)
+ return -ENOMEM;
+
+ err = mlx5e_rss_init_no_tirs(rss, res->mdev, inner_ft_support, res->drop_rqn);
+ if (err)
+ goto err_rss_free;
+
+ mlx5e_rss_set_indir_uniform(rss, init_nch);
+ if (res->rss_active)
+ mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+
+ res->rss[i] = rss;
+ *rss_idx = i;
+
+ return 0;
+
+err_rss_free:
+ mlx5e_rss_free(rss);
+ return err;
+}
+
+static int __mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ struct mlx5e_rss *rss = res->rss[rss_idx];
+ int err;
+
+ err = mlx5e_rss_cleanup(rss);
+ if (err)
+ return err;
+
+ mlx5e_rss_free(rss);
+ res->rss[rss_idx] = NULL;
+
+ return 0;
+}
+
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -EINVAL;
+
+ return __mlx5e_rx_res_rss_destroy(res, rss_idx);
+}
+
+static void mlx5e_rx_res_rss_destroy_all(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+ int err;
+
+ if (!rss)
+ continue;
+
+ err = __mlx5e_rx_res_rss_destroy(res, i);
+ if (err) {
+ unsigned int refcount;
+
+ refcount = mlx5e_rss_refcnt_read(rss);
+ mlx5_core_warn(res->mdev,
+ "Failed to destroy RSS context %d, refcount = %u, err = %d\n",
+ i, refcount, err);
+ }
+ }
+}
+
+static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ res->rss_active = true;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+
+ if (!rss)
+ continue;
+ mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+ }
+}
+
+static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res)
+{
+ int i;
+
+ res->rss_active = false;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+ struct mlx5e_rss *rss = res->rss[i];
+
+ if (!rss)
+ continue;
+ mlx5e_rss_disable(rss);
+ }
+}
+
+/* Updates the indirection table SW shadow, does not update the HW resources yet */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch)
+{
+ WARN_ON_ONCE(res->rss_active);
+ mlx5e_rss_set_indir_uniform(res->rss[0], nch);
+}
+
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u32 *indir, u8 *key, u8 *hfunc)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -ENOENT;
+
+ return mlx5e_rss_get_rxfh(rss, indir, key, hfunc);
+}
+
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ const u32 *indir, const u8 *key, const u8 *hfunc)
+{
+ struct mlx5e_rss *rss;
+
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return -EINVAL;
+
+ rss = res->rss[rss_idx];
+ if (!rss)
+ return -ENOENT;
+
+ return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch);
+}
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_hash_fields(rss, tt);
+}
+
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_set_hash_fields(rss, tt, rx_hash_fields);
+}
+
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res)
+{
+ int i, cnt;
+
+ cnt = 0;
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+ if (res->rss[i])
+ cnt++;
+
+ return cnt;
+}
+
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss)
+{
+ int i;
+
+ if (!rss)
+ return -EINVAL;
+
+ for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+ if (rss == res->rss[i])
+ return i;
+
+ return -ENOENT;
+}
+
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+ if (rss_idx >= MLX5E_MAX_NUM_RSS)
+ return NULL;
+
+ return res->rss[rss_idx];
+}
+
+/* End of API rx_res_rss_* */
+
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
+}
+
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err = 0;
+ int ix;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL);
+ if (!res->channels) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt,
+ res->mdev, false, res->drop_rqn);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_rqts;
+ }
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+ goto out;
+
+err_destroy_direct_tirs:
+ while (--ix >= 0)
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+
+ ix = res->max_nch;
+err_destroy_direct_rqts:
+ while (--ix >= 0)
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+ kvfree(res->channels);
+
+out:
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
+
+static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn);
+ if (err)
+ goto out;
+
+ /* Separated from the channels RQs, does not share pkt_merge state with them */
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true);
+ if (err)
+ goto err_destroy_ptp_rqt;
+
+ goto out;
+
+err_destroy_ptp_rqt:
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+
+out:
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+ }
+
+ kvfree(res->channels);
+}
+
+static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_tir_destroy(&res->ptp.tir);
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+}
+
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ unsigned int init_nch)
+{
+ int err;
+
+ res->mdev = mdev;
+ res->features = features;
+ res->max_nch = max_nch;
+ res->drop_rqn = drop_rqn;
+
+ res->pkt_merge_param = *init_pkt_merge_param;
+ init_rwsem(&res->pkt_merge_param_sem);
+
+ err = mlx5e_rx_res_rss_init_def(res, init_nch);
+ if (err)
+ goto err_out;
+
+ err = mlx5e_rx_res_channels_init(res);
+ if (err)
+ goto err_rss_destroy;
+
+ err = mlx5e_rx_res_ptp_init(res);
+ if (err)
+ goto err_channels_destroy;
+
+ return 0;
+
+err_channels_destroy:
+ mlx5e_rx_res_channels_destroy(res);
+err_rss_destroy:
+ __mlx5e_rx_res_rss_destroy(res, 0);
+err_out:
+ return err;
+}
+
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_rx_res_ptp_destroy(res);
+ mlx5e_rx_res_channels_destroy(res);
+ mlx5e_rx_res_rss_destroy_all(res);
+}
+
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res)
+{
+ kvfree(res);
+}
+
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_tirn(rss, tt, false);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ struct mlx5e_rss *rss = res->rss[0];
+
+ return mlx5e_rss_get_tirn(rss, tt, true);
+}
+
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+{
+ WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP));
+ return mlx5e_tir_get_tirn(&res->ptp.tir);
+}
+
+static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+}
+
+static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res,
+ struct mlx5e_channels *chs,
+ unsigned int ix)
+{
+ u32 rqn = res->rss_rqns[ix];
+ int err;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ rqn, ix, err);
+}
+
+static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
+ unsigned int ix)
+{
+ int err;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ res->drop_rqn, ix, err);
+}
+
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+ unsigned int nch, ix;
+ int err;
+
+ nch = mlx5e_channels_get_num(chs);
+
+ for (ix = 0; ix < chs->num; ix++) {
+ if (mlx5e_channels_is_xsk(chs, ix))
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+ }
+ res->rss_nch = chs->num;
+
+ mlx5e_rx_res_rss_enable(res);
+
+ for (ix = 0; ix < nch; ix++)
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
+ for (ix = nch; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ u32 rqn;
+
+ if (!mlx5e_channels_get_ptp_rqn(chs, &rqn))
+ rqn = res->drop_rqn;
+
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ rqn, err);
+ }
+}
+
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+ int err;
+
+ mlx5e_rx_res_rss_disable(res);
+
+ for (ix = 0; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ res->drop_rqn, err);
+ }
+}
+
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk)
+{
+ if (xsk)
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+
+ mlx5e_rx_res_rss_enable(res);
+
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
+}
+
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+ struct mlx5e_packet_merge_param *pkt_merge_param)
+{
+ struct mlx5e_tir_builder *builder;
+ int err, final_err;
+ unsigned int ix;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ down_write(&res->pkt_merge_param_sem);
+ res->pkt_merge_param = *pkt_merge_param;
+
+ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+
+ final_err = 0;
+
+ for (ix = 0; ix < MLX5E_MAX_NUM_RSS; ix++) {
+ struct mlx5e_rss *rss = res->rss[ix];
+
+ if (!rss)
+ continue;
+
+ err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param);
+ if (err)
+ final_err = final_err ? : err;
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n",
+ mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ up_write(&res->pkt_merge_param_sem);
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res)
+{
+ return mlx5e_rss_get_hash(res->rss[0]);
+}
+
+int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
+ struct mlx5e_tir *tir)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ u32 rqtn;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq);
+
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn,
+ inner_ft_support);
+ mlx5e_tir_builder_build_direct(builder);
+ mlx5e_tir_builder_build_tls(builder);
+ down_read(&res->pkt_merge_param_sem);
+ mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
+ err = mlx5e_tir_init(tir, builder, res->mdev, false);
+ up_read(&res->pkt_merge_param_sem);
+
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
new file mode 100644
index 000000000..5d5f64fab
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RX_RES_H__
+#define __MLX5_EN_RX_RES_H__
+
+#include <linux/kernel.h>
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+#include "rss.h"
+
+struct mlx5e_rx_res;
+
+struct mlx5e_channels;
+struct mlx5e_rss_params_hash;
+
+enum mlx5e_rx_res_features {
+ MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
+ MLX5E_RX_RES_FEATURE_PTP = BIT(1),
+};
+
+/* Setup */
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ unsigned int init_nch);
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+
+/* TIRN getters for flow steering */
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+
+/* Activate/deactivate API */
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk);
+
+/* Configuration API */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ const u32 *indir, const u8 *key, const u8 *hfunc);
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+ struct mlx5e_packet_merge_param *pkt_merge_param);
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss);
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx);
+
+/* Workaround for hairpin */
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
+
+/* Accel TIRs */
+int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
+ struct mlx5e_tir *tir);
+#endif /* __MLX5_EN_RX_RES_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
new file mode 100644
index 000000000..f675b1926
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "selq.h"
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include "en.h"
+#include "en/ptp.h"
+#include "en/htb.h"
+
+struct mlx5e_selq_params {
+ unsigned int num_regular_queues;
+ unsigned int num_channels;
+ unsigned int num_tcs;
+ union {
+ u8 is_special_queues;
+ struct {
+ bool is_htb : 1;
+ bool is_ptp : 1;
+ };
+ };
+ u16 htb_maj_id;
+ u16 htb_defcls;
+};
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
+{
+ struct mlx5e_selq_params *init_params;
+
+ selq->state_lock = state_lock;
+
+ selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL);
+ if (!selq->standby)
+ return -ENOMEM;
+
+ init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL);
+ if (!init_params) {
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ return -ENOMEM;
+ }
+ /* Assign dummy values, so that mlx5e_select_queue won't crash. */
+ *init_params = (struct mlx5e_selq_params) {
+ .num_regular_queues = 1,
+ .num_channels = 1,
+ .num_tcs = 1,
+ .is_htb = false,
+ .is_ptp = false,
+ .htb_maj_id = 0,
+ .htb_defcls = 0,
+ };
+ rcu_assign_pointer(selq->active, init_params);
+
+ return 0;
+}
+
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
+{
+ WARN_ON_ONCE(selq->is_prepared);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ selq->is_prepared = true;
+
+ mlx5e_selq_apply(selq);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+}
+
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->num_channels = params->num_channels;
+ selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params);
+ selq->standby->num_regular_queues =
+ selq->standby->num_channels * selq->standby->num_tcs;
+ selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS);
+}
+
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *selq_active =
+ rcu_dereference_protected(selq->active, lockdep_is_held(selq->state_lock));
+
+ return selq_active->htb_maj_id;
+}
+
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->is_htb = htb_maj_id;
+ selq->standby->htb_maj_id = htb_maj_id;
+ selq->standby->htb_defcls = htb_defcls;
+}
+
+void mlx5e_selq_apply(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *old_params;
+
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+
+ old_params = rcu_replace_pointer(selq->active, selq->standby,
+ lockdep_is_held(selq->state_lock));
+ synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */
+ selq->standby = old_params;
+}
+
+void mlx5e_selq_cancel(struct mlx5e_selq *selq)
+{
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+}
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+ int dscp_cp = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+ return priv->dcbx_dp.dscp2prio[dscp_cp];
+}
+#endif
+
+static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP)
+ return mlx5e_get_dscp_up(priv, skb);
+#endif
+ if (skb_vlan_tag_present(skb))
+ return skb_vlan_tag_get_prio(skb);
+ return 0;
+}
+
+static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int up;
+
+ up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0;
+
+ return selq->num_regular_queues + up;
+}
+
+static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ u16 classid;
+
+ /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
+ if ((TC_H_MAJ(skb->priority) >> 16) == selq->htb_maj_id)
+ classid = TC_H_MIN(skb->priority);
+ else
+ classid = selq->htb_defcls;
+
+ if (!classid)
+ return 0;
+
+ return mlx5e_htb_get_txq_by_classid(priv->htb, classid);
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_selq_params *selq;
+ int txq_ix, up;
+
+ selq = rcu_dereference_bh(priv->selq.active);
+
+ /* This is a workaround needed only for the mlx5e_netdev_change_profile
+ * flow that zeroes out the whole priv without unregistering the netdev
+ * and without preventing ndo_select_queue from being called.
+ */
+ if (unlikely(!selq))
+ return 0;
+
+ if (likely(!selq->is_special_queues)) {
+ /* No special queues, netdev_pick_tx returns one of the regular ones. */
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ /* Normalize any picked txq_ix to [0, num_channels),
+ * So we can return a txq_ix that matches the channel and
+ * packet UP.
+ */
+ return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) +
+ up * selq->num_channels;
+ }
+
+ if (unlikely(selq->htb_maj_id)) {
+ /* num_tcs == 1, shortcut for PTP */
+
+ txq_ix = mlx5e_select_htb_queue(priv, skb, selq);
+ if (txq_ix > 0)
+ return txq_ix;
+
+ if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb)))
+ return selq->num_channels;
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
+ * If they are selected, switch to regular queues.
+ * Driver to select these queues only at mlx5e_select_ptpsq()
+ * and mlx5e_select_htb_queue().
+ */
+ return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels);
+ }
+
+ /* PTP is enabled */
+
+ if (mlx5e_use_ptpsq(skb))
+ return mlx5e_select_ptpsq(dev, skb, selq);
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Normalize any picked txq_ix to [0, num_channels). Queues in range
+ * [0, num_regular_queues) will be mapped to the corresponding channel
+ * index, so that we can apply the packet's UP (if num_tcs > 1).
+ * If netdev_pick_tx() picks ptp_channel, switch to a regular queue,
+ * because driver should select the PTP only at mlx5e_select_ptpsq().
+ */
+ txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ return txq_ix + up * selq->num_channels;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
new file mode 100644
index 000000000..fd590f80e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_SELQ_H__
+#define __MLX5_EN_SELQ_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_selq_params;
+
+struct mlx5e_selq {
+ struct mlx5e_selq_params __rcu *active;
+ struct mlx5e_selq_params *standby;
+ struct mutex *state_lock; /* points to priv->state_lock */
+ bool is_prepared;
+};
+
+struct mlx5e_params;
+struct net_device;
+struct sk_buff;
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock);
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq);
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params);
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls);
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq);
+void mlx5e_selq_apply(struct mlx5e_selq *selq);
+void mlx5e_selq_cancel(struct mlx5e_selq *selq);
+
+static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels)
+{
+ while (unlikely(txq >= num_channels))
+ txq -= num_channels;
+ return txq;
+}
+
+static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels)
+{
+ if (unlikely(txq >= num_channels)) {
+ if (unlikely(txq >= num_channels << 3))
+ txq %= num_channels;
+ else
+ do
+ txq -= num_channels;
+ while (txq >= num_channels);
+ }
+ return txq;
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev);
+
+#endif /* __MLX5_EN_SELQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
new file mode 100644
index 000000000..21aab9635
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_ACCEPT;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_accept = {
+ .can_offload = tc_act_can_offload_accept,
+ .parse_action = tc_act_parse_accept,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
new file mode 100644
index 000000000..3337241cf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc/post_act.h"
+#include "en/tc_priv.h"
+#include "mlx5_core.h"
+
+static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress,
+ [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_MANGLE] = &mlx5e_tc_act_vlan_mangle,
+ [FLOW_ACTION_TUNNEL_ENCAP] = &mlx5e_tc_act_tun_encap,
+ [FLOW_ACTION_TUNNEL_DECAP] = &mlx5e_tc_act_tun_decap,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_PTYPE] = &mlx5e_tc_act_ptype,
+ [FLOW_ACTION_SAMPLE] = &mlx5e_tc_act_sample,
+ [FLOW_ACTION_POLICE] = &mlx5e_tc_act_police,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+ [FLOW_ACTION_MPLS_PUSH] = &mlx5e_tc_act_mpls_push,
+ [FLOW_ACTION_MPLS_POP] = &mlx5e_tc_act_mpls_pop,
+ [FLOW_ACTION_VLAN_PUSH_ETH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP_ETH] = &mlx5e_tc_act_vlan,
+};
+
+static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred_nic,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_MARK] = &mlx5e_tc_act_mark,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+};
+
+/**
+ * mlx5e_tc_act_get() - Get an action parser for an action id.
+ * @act_id: Flow action id.
+ * @ns_type: flow namespace type.
+ */
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5e_tc_act **tc_acts;
+
+ tc_acts = ns_type == MLX5_FLOW_NAMESPACE_FDB ? tc_acts_fdb : tc_acts_nic;
+
+ return tc_acts[act_id];
+}
+
+/**
+ * mlx5e_tc_act_init_parse_state() - Init a new parse_state.
+ * @parse_state: Parsing state.
+ * @flow: mlx5e tc flow being handled.
+ * @flow_action: flow action to parse.
+ * @extack: to set an error msg.
+ *
+ * The same parse_state should be passed to action parsers
+ * for tracking the current parsing state.
+ */
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ memset(parse_state, 0, sizeof(*parse_state));
+ parse_state->flow = flow;
+ parse_state->extack = extack;
+ parse_state->flow_action = flow_action;
+}
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder)
+{
+ struct flow_action_entry *act;
+ int i, j = 0;
+
+ flow_action_for_each(i, act, flow_action) {
+ /* Add CT action to be first. */
+ if (act->id == FLOW_ACTION_CT)
+ flow_action_reorder->entries[j++] = act;
+ }
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT)
+ continue;
+ flow_action_reorder->entries[j++] = act;
+ }
+}
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct flow_action_entry *act;
+ struct mlx5e_tc_act *tc_act;
+ struct mlx5e_priv *priv;
+ int err = 0, i;
+
+ priv = parse_state->flow->priv;
+
+ flow_action_for_each(i, act, flow_action) {
+ tc_act = mlx5e_tc_act_get(act->id, ns_type);
+ if (!tc_act || !tc_act->post_parse)
+ continue;
+
+ err = tc_act->post_parse(parse_state, priv, attr);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr)
+{
+ struct mlx5_core_dev *mdev = flow->priv->mdev;
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ int err;
+
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+
+ /* Set handle on current post act rule to next post act rule. */
+ err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts);
+ if (err) {
+ mlx5_core_warn(mdev, "Failed setting post action handle");
+ return err;
+ }
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
new file mode 100644
index 000000000..e1570ff05
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_H__
+#define __MLX5_EN_TC_ACT_H__
+
+#include <net/tc_act/tc_pedit.h>
+#include <net/flow_offload.h>
+#include <linux/netlink.h>
+#include "eswitch.h"
+#include "pedit.h"
+
+struct mlx5_flow_attr;
+
+struct mlx5e_tc_act_parse_state {
+ struct flow_action *flow_action;
+ struct mlx5e_tc_flow *flow;
+ struct netlink_ext_ack *extack;
+ u32 actions;
+ bool ct;
+ bool ct_clear;
+ bool encap;
+ bool decap;
+ bool mpls_push;
+ bool eth_push;
+ bool eth_pop;
+ bool ptype_host;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
+ int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
+ int if_count;
+ struct mlx5_tc_ct_priv *ct_priv;
+};
+
+struct mlx5e_tc_act {
+ bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr);
+
+ int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ bool (*is_multi_table_act)(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr);
+
+ int (*offload_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act);
+
+ int (*destroy_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+
+ int (*stats_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+};
+
+struct mlx5e_tc_flow_action {
+ unsigned int num_entries;
+ struct flow_action_entry **entries;
+};
+
+extern struct mlx5e_tc_act mlx5e_tc_act_drop;
+extern struct mlx5e_tc_act mlx5e_tc_act_trap;
+extern struct mlx5e_tc_act mlx5e_tc_act_accept;
+extern struct mlx5e_tc_act mlx5e_tc_act_mark;
+extern struct mlx5e_tc_act mlx5e_tc_act_goto;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_encap;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_decap;
+extern struct mlx5e_tc_act mlx5e_tc_act_csum;
+extern struct mlx5e_tc_act mlx5e_tc_act_pedit;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic;
+extern struct mlx5e_tc_act mlx5e_tc_act_ct;
+extern struct mlx5e_tc_act mlx5e_tc_act_sample;
+extern struct mlx5e_tc_act mlx5e_tc_act_ptype;
+extern struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress;
+extern struct mlx5e_tc_act mlx5e_tc_act_police;
+
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack);
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder);
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type);
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr);
+
+#endif /* __MLX5_EN_TC_ACT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
new file mode 100644
index 000000000..c0f08ae6a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/tc_act/tc_csum.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+csum_offload_supported(struct mlx5e_priv *priv,
+ u32 action,
+ u32 update_flags,
+ struct netlink_ext_ack *extack)
+{
+ u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+ TCA_CSUM_UPDATE_FLAG_UDP;
+
+ /* The HW recalcs checksums only if re-writing headers */
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "TC csum action is only offloaded with pedit");
+ netdev_warn(priv->netdev,
+ "TC csum action is only offloaded with pedit\n");
+ return false;
+ }
+
+ if (update_flags & ~prot_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload TC csum action for some header/s");
+ netdev_warn(priv->netdev,
+ "can't offload TC csum action for some header/s - flags %#x\n",
+ update_flags);
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ return csum_offload_supported(flow->priv, attr->action,
+ act->csum_flags, parse_state->extack);
+}
+
+static int
+tc_act_parse_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_csum = {
+ .can_offload = tc_act_can_offload_csum,
+ .parse_action = tc_act_parse_csum,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
new file mode 100644
index 000000000..a829c9428
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+
+static bool
+tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->ct && !clear_action) {
+ NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ int err;
+
+ /* It's redundant to do ct clear more than once. */
+ if (clear_action && parse_state->ct_clear)
+ return 0;
+
+ err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
+ &attr->parse_attr->mod_hdr_acts,
+ act, parse_state->extack);
+ if (err)
+ return err;
+
+
+ if (mlx5e_is_eswitch_flow(parse_state->flow))
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ if (clear_action) {
+ parse_state->ct_clear = true;
+ } else {
+ attr->flags |= MLX5_ATTR_FLAG_CT;
+ flow_flag_set(parse_state->flow, CT);
+ parse_state->ct = true;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts;
+ int err;
+
+ /* If ct action exist, we can ignore previous ct_clear actions */
+ if (parse_state->ct)
+ return 0;
+
+ if (parse_state->ct_clear) {
+ err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Failed to set registers for ct clear");
+ return err;
+ }
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Prevent handling of additional, redundant clear actions */
+ parse_state->ct_clear = false;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->ct.action & TCA_CT_ACT_CLEAR)
+ return false;
+
+ return true;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ct = {
+ .can_offload = tc_act_can_offload_ct,
+ .parse_action = tc_act_parse_ct,
+ .is_multi_table_act = tc_act_is_multi_table_act_ct,
+ .post_parse = tc_act_post_parse_ct,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
new file mode 100644
index 000000000..dd025a95c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_drop = {
+ .can_offload = tc_act_can_offload_drop,
+ .parse_action = tc_act_parse_drop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
new file mode 100644
index 000000000..25174f686
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "eswitch.h"
+
+static int
+validate_goto_chain(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ bool is_esw = mlx5e_is_eswitch_flow(flow);
+ bool ft_flow = mlx5e_is_ft_flow(flow);
+ u32 dest_chain = act->chain_index;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_eswitch *esw;
+ u32 reformat_and_fwd;
+ u32 max_chain;
+
+ esw = priv->mdev->priv.eswitch;
+ chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(tc);
+ max_chain = mlx5_chains_get_chain_range(chains);
+ reformat_and_fwd = is_esw ?
+ MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
+
+ if (ft_flow) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_chains_backwards_supported(chains) &&
+ dest_chain <= attr->chain) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (dest_chain > max_chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested destination chain is out of supported range");
+ return -EOPNOTSUPP;
+ }
+
+ if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ !reformat_and_fwd) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Goto chain is not allowed if action has reformat or decap");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (validate_goto_chain(flow->priv, flow, attr, act, extack))
+ return false;
+
+ return true;
+}
+
+static int
+tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->dest_chain = act->chain_index;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (!attr->dest_chain)
+ return 0;
+
+ if (parse_state->decap) {
+ /* It can be supported if we'll create a mapping for
+ * the tunnel device only (without tunnel), and set
+ * this tunnel id with this decap flow.
+ *
+ * On restore (miss), we'll just set this saved tunnel
+ * device.
+ */
+
+ NL_SET_ERR_MSG_MOD(extack, "Decap with goto isn't supported");
+ netdev_warn(priv->netdev, "Decap with goto isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5e_is_eswitch_flow(flow) && parse_attr->mirred_ifindex[0]) {
+ NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_goto = {
+ .can_offload = tc_act_can_offload_goto,
+ .parse_action = tc_act_parse_goto,
+ .post_parse = tc_act_post_parse_goto,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
new file mode 100644
index 000000000..e8d227595
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en_tc.h"
+
+static bool
+tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->nic_attr->flow_tag = act->mark;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mark = {
+ .can_offload = tc_act_can_offload_mark,
+ .parse_action = tc_act_parse_mark,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
new file mode 100644
index 000000000..4ac7de3f6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_macvlan.h>
+#include <linux/if_vlan.h>
+#include <net/bareudp.h>
+#include <net/bonding.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+#include "en_rep.h"
+#include "lag/lag.h"
+
+static bool
+same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev)
+{
+ return mlx5e_eswitch_vf_rep(priv->netdev) &&
+ priv->netdev == out_dev;
+}
+
+static int
+verify_uplink_forwarding(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rep_priv;
+
+ /* Forwarding non encapsulated traffic between
+ * uplink ports is allowed only if
+ * termination_table_raw_traffic cap is set.
+ *
+ * Input vport was stored attr->in_rep.
+ * In LAG case, *priv* is the private data of
+ * uplink which may be not the input vport.
+ */
+ rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep);
+
+ if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
+ mlx5e_eswitch_uplink_rep(out_dev)))
+ return 0;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
+ termination_table_raw_traffic)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are both uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ } else if (out_dev != rep_priv->netdev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not the same uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static bool
+is_duplicated_output_device(struct net_device *dev,
+ struct net_device *out_dev,
+ int *ifindexes, int if_count,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < if_count; i++) {
+ if (ifindexes[i] == out_dev->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device");
+ netdev_err(dev, "can't duplicate output to same device: %s\n",
+ out_dev->name);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static struct net_device *
+get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev)
+{
+ struct net_device *fdb_out_dev = out_dev;
+ struct net_device *uplink_upper;
+
+ rcu_read_lock();
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+ if (uplink_upper && netif_is_lag_master(uplink_upper) &&
+ uplink_upper == out_dev) {
+ fdb_out_dev = uplink_dev;
+ } else if (netif_is_lag_master(out_dev)) {
+ fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
+ if (fdb_out_dev &&
+ (!mlx5e_eswitch_rep(fdb_out_dev) ||
+ !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
+ fdb_out_dev = NULL;
+ }
+ rcu_read_unlock();
+ return fdb_out_dev;
+}
+
+static bool
+tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev) {
+ /* out_dev is NULL when filters with
+ * non-existing mirred device are replayed to
+ * the driver.
+ */
+ return false;
+ }
+
+ if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device");
+ return false;
+ }
+
+ if (parse_state->eth_pop && !parse_state->mpls_push) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push");
+ return false;
+ }
+
+ if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push");
+ return false;
+ }
+
+ if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) {
+ /* Ignore forward to self rules generated
+ * by adding both mlx5 devs to the flow table
+ * block on a normal nft offload setup.
+ */
+ return false;
+ }
+
+ if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't support more output ports, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "can't support more than %d output ports, can't offload forwarding\n",
+ esw_attr->out_count);
+ return false;
+ }
+
+ if (parse_state->encap ||
+ netdev_port_same_parent_id(priv->netdev, out_dev) ||
+ netif_is_ovs_master(out_dev))
+ return true;
+
+ if (parse_attr->filter_dev != priv->netdev) {
+ /* All mlx5 devices are called to configure
+ * high level device filters. Therefore, the
+ * *attempt* to install a filter on invalid
+ * eswitch should not trigger an explicit error
+ */
+ return false;
+ }
+
+ NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding");
+
+ return false;
+}
+
+static int
+parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+
+ parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex;
+ parse_attr->tun_info[esw_attr->out_count] =
+ mlx5e_dup_tun_info(parse_state->tun_info);
+
+ if (!parse_attr->tun_info[esw_attr->out_count])
+ return -ENOMEM;
+
+ parse_state->encap = false;
+
+ if (parse_state->mpls_push) {
+ memcpy(&parse_attr->mpls_info[esw_attr->out_count],
+ &parse_state->mpls_info, sizeof(parse_state->mpls_info));
+ parse_state->mpls_push = false;
+ }
+ esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP;
+ esw_attr->out_count++;
+ /* attr->dests[].rep is resolved when we handle encap */
+
+ return 0;
+}
+
+static int
+parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct net_device *out_dev = act->dev;
+ struct net_device *uplink_dev;
+ struct mlx5e_priv *out_priv;
+ struct mlx5_eswitch *esw;
+ bool is_uplink_rep;
+ int *ifindexes;
+ int if_count;
+ int err;
+
+ esw = priv->mdev->priv.eswitch;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ ifindexes = parse_state->ifindexes;
+ if_count = parse_state->if_count;
+
+ if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack))
+ return -EOPNOTSUPP;
+
+ parse_state->ifindexes[if_count] = out_dev->ifindex;
+ parse_state->if_count++;
+ is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev);
+ err = mlx5_lag_do_mirred(priv->mdev, out_dev);
+ if (err)
+ return err;
+
+ out_dev = get_fdb_out_dev(uplink_dev, out_dev);
+ if (!out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(out_dev)) {
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack);
+ if (err)
+ return err;
+ }
+
+ if (is_vlan_dev(parse_attr->filter_dev)) {
+ err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack);
+ if (err)
+ return err;
+ }
+
+ if (netif_is_macvlan(out_dev))
+ out_dev = macvlan_dev_real_dev(out_dev);
+
+ err = verify_uplink_forwarding(priv, attr, out_dev, extack);
+ if (err)
+ return err;
+
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+
+ if (same_vf_reps(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself");
+ return -EOPNOTSUPP;
+ }
+
+ out_priv = netdev_priv(out_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
+ esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
+
+ /* If output device is bond master then rules are not explicit
+ * so we don't attempt to count them.
+ */
+ if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
+ MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
+ attr->lag.count = true;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+static int
+parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_EGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+ return 0;
+}
+
+static int
+tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct net_device *out_dev = act->dev;
+ int err = -EOPNOTSUPP;
+
+ if (parse_state->encap)
+ err = parse_mirred_encap(parse_state, act, attr);
+ else if (netdev_port_same_parent_id(priv->netdev, out_dev))
+ err = parse_mirred(parse_state, act, priv, attr);
+ else if (netif_is_ovs_master(out_dev))
+ err = parse_mirred_ovs_master(parse_state, act, priv, attr);
+
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred = {
+ .can_offload = tc_act_can_offload_mirred,
+ .parse_action = tc_act_parse_mirred,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
new file mode 100644
index 000000000..90b4c1b34
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+
+ if (act->id != FLOW_ACTION_REDIRECT)
+ return false;
+
+ if (priv->netdev->netdev_ops != out_dev->netdev_ops ||
+ !mlx5e_same_hw_devs(priv, netdev_priv(out_dev))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "devices %s %s not on same switch HW, can't offload forwarding\n",
+ netdev_name(priv->netdev),
+ out_dev->name);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex;
+ flow_flag_set(parse_state->flow, HAIRPIN);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = {
+ .can_offload = tc_act_can_offload_mirred_nic,
+ .parse_action = tc_act_parse_mirred_nic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
new file mode 100644
index 000000000..f106190bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/bareudp.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_priv *priv = parse_state->flow->priv;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) ||
+ act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls push is supported only for mpls_uc protocol");
+ return false;
+ }
+
+ return true;
+}
+
+static void
+copy_mpls_info(struct mlx5e_mpls_info *mpls_info,
+ const struct flow_action_entry *act)
+{
+ mpls_info->label = act->mpls_push.label;
+ mpls_info->tc = act->mpls_push.tc;
+ mpls_info->bos = act->mpls_push.bos;
+ mpls_info->ttl = act->mpls_push.ttl;
+}
+
+static int
+tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->mpls_push = true;
+ copy_mpls_info(&parse_state->mpls_info, act);
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct net_device *filter_dev;
+
+ filter_dev = attr->parse_attr->filter_dev;
+
+ /* we only support mpls pop if it is the first action
+ * or it is second action after tunnel key unset
+ * and the filter net device is bareudp. Subsequent
+ * actions can be pedit and the last can be mirred
+ * egress redirect.
+ */
+ if ((act_index == 1 && !parse_state->decap) || act_index > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap");
+ return false;
+ }
+
+ if (!netif_is_bareudp(filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->esw_attr->eth.h_proto = act->mpls_pop.proto;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_flag_set(parse_state->flow, L3_TO_L2_DECAP);
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_push = {
+ .can_offload = tc_act_can_offload_mpls_push,
+ .parse_action = tc_act_parse_mpls_push,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_pop = {
+ .can_offload = tc_act_can_offload_mpls_pop,
+ .parse_action = tc_act_parse_mpls_pop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
new file mode 100644
index 000000000..47597c524
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "pedit.h"
+#include "en/tc_priv.h"
+#include "en/mod_hdr.h"
+
+static int pedit_header_offsets[] = {
+ [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+ [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+ [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+};
+
+#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
+
+static int
+set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u32 *curr_pmask, *curr_pval;
+
+ curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
+ curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
+
+ if (*curr_pmask & mask) { /* disallow acting twice on the same location */
+ NL_SET_ERR_MSG_MOD(extack,
+ "curr_pmask and new mask same. Acting twice on same location");
+ goto out_err;
+ }
+
+ *curr_pmask |= mask;
+ *curr_pval |= (val & mask);
+
+ return 0;
+
+out_err:
+ return -EOPNOTSUPP;
+}
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
+ u8 htype = act->mangle.htype;
+ int err = -EOPNOTSUPP;
+ u32 mask, val, offset;
+
+ if (htype == FLOW_ACT_MANGLE_UNSPEC) {
+ NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
+ goto out_err;
+ }
+
+ if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) {
+ NL_SET_ERR_MSG_MOD(extack, "The pedit offload action is not supported");
+ goto out_err;
+ }
+
+ mask = act->mangle.mask;
+ val = act->mangle.val;
+ offset = act->mangle.offset;
+
+ err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack);
+ if (err)
+ goto out_err;
+
+ hdrs[cmd].pedits++;
+
+ return 0;
+out_err:
+ return err;
+}
+
+static bool
+tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(flow);
+
+ err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs,
+ parse_state->extack);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_pedit = {
+ .can_offload = tc_act_can_offload_pedit,
+ .parse_action = tc_act_parse_pedit,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
new file mode 100644
index 000000000..434c8bd71
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_PEDIT_H__
+#define __MLX5_EN_TC_ACT_PEDIT_H__
+
+#include "en_tc.h"
+
+struct pedit_headers {
+ struct ethhdr eth;
+ struct vlan_hdr vlan;
+ struct iphdr ip4;
+ struct ipv6hdr ip6;
+ struct tcphdr tcp;
+ struct udphdr udp;
+};
+
+struct pedit_headers_action {
+ struct pedit_headers vals;
+ struct pedit_headers masks;
+ u32 pedits;
+};
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
new file mode 100644
index 000000000..c8e5ca65b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
+ act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Offload not supported when conform action is not pipe or ok");
+ return false;
+ }
+ if (mlx5e_policer_validate(parse_state->flow_action, act,
+ parse_state->extack))
+ return false;
+
+ return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev);
+}
+
+static int
+fill_meter_params_from_act(const struct flow_action_entry *act,
+ struct mlx5e_flow_meter_params *params)
+{
+ params->index = act->hw_index;
+ if (act->police.rate_bytes_ps) {
+ params->mode = MLX5_RATE_LIMIT_BPS;
+ /* change rate to bits per second */
+ params->rate = act->police.rate_bytes_ps << 3;
+ params->burst = act->police.burst;
+ } else if (act->police.rate_pkt_ps) {
+ params->mode = MLX5_RATE_LIMIT_PPS;
+ params->rate = act->police.rate_pkt_ps;
+ params->burst = act->police.burst_pkt;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ int err;
+
+ err = fill_meter_params_from_act(act, &attr->meter_attr.params);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO;
+ attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER;
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_police(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_police_offload(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ int err = 0;
+
+ err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack);
+ if (err)
+ return err;
+
+ err = fill_meter_params_from_act(act, &params);
+ if (err)
+ return err;
+
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter) && PTR_ERR(meter) == -ENOENT) {
+ meter = mlx5e_tc_meter_replace(priv->mdev, &params);
+ } else if (!IS_ERR(meter)) {
+ err = mlx5e_tc_meter_update(meter, &params);
+ mlx5e_tc_meter_put(meter);
+ }
+
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ err = PTR_ERR(meter);
+ }
+
+ return err;
+}
+
+static int
+tc_act_police_destroy(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+ /* first put for the get and second for cleanup */
+ mlx5e_tc_meter_put(meter);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+static int
+tc_act_police_stats(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ u64 bytes, packets, drops, lastuse;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+
+ mlx5e_tc_meter_get_stats(meter, &bytes, &packets, &drops, &lastuse);
+ flow_stats_update(&fl_act->stats, bytes, packets, drops, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_police = {
+ .can_offload = tc_act_can_offload_police,
+ .parse_action = tc_act_parse_police,
+ .is_multi_table_act = tc_act_is_multi_table_act_police,
+ .offload_action = tc_act_police_offload,
+ .destroy_action = tc_act_police_destroy,
+ .stats_action = tc_act_police_stats,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
new file mode 100644
index 000000000..6454b031f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (act->ptype != PACKET_HOST) {
+ NL_SET_ERR_MSG_MOD(extack, "skbedit ptype is only supported with type host");
+ return -EOPNOTSUPP;
+ }
+
+ parse_state->ptype_host = true;
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ptype = {
+ .can_offload = tc_act_can_offload_ptype,
+ .parse_action = tc_act_parse_ptype,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
new file mode 100644
index 000000000..ad09a8a5f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev)
+ return false;
+
+ if (!netif_is_ovs_master(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is supported only for OVS internal ports");
+ return false;
+ }
+
+ if (netif_is_ovs_master(parse_attr->filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is not supported from internal port");
+ return false;
+ }
+
+ if (!parse_state->ptype_host) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress requires ptype=host action");
+ return false;
+ }
+
+ if (esw_attr->out_count) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress is supported only as single destination");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_INGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress = {
+ .can_offload = tc_act_can_offload_redirect_ingress,
+ .parse_action = tc_act_parse_redirect_ingress,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
new file mode 100644
index 000000000..2c0196431
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/psample.h>
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc/act/sample.h"
+
+static bool
+tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ bool ct_nat;
+
+ ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+
+ if (flow_flag_test(parse_state->flow, CT) && ct_nat) {
+ NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_sample_attr *sample_attr = &attr->sample_attr;
+
+ sample_attr->rate = act->sample.rate;
+ sample_attr->group_num = act->sample.psample_group->group_num;
+
+ if (act->sample.truncate)
+ sample_attr->trunc_size = act->sample.trunc_size;
+
+ attr->flags |= MLX5_ATTR_FLAG_SAMPLE;
+ flow_flag_set(parse_state->flow, SAMPLE);
+
+ return 0;
+}
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr)
+{
+ if (MLX5_CAP_GEN(mdev, reg_c_preserve) ||
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return true;
+
+ return false;
+}
+
+static bool
+tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr);
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_sample = {
+ .can_offload = tc_act_can_offload_sample,
+ .parse_action = tc_act_parse_sample,
+ .is_multi_table_act = tc_act_is_multi_table_act_sample,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
new file mode 100644
index 000000000..3efb3a15c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__
+#define __MLX5_EN_TC_ACT_SAMPLE_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr);
+
+#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
new file mode 100644
index 000000000..53b270f65
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->flow_action->num_entries != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "action trap is supported as a sole action only");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_trap = {
+ .can_offload = tc_act_can_offload_trap,
+ .parse_action = tc_act_parse_trap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
new file mode 100644
index 000000000..b4fa2de97
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (!act->tunnel) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Zero tunnel attributes is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->tun_info = act->tunnel;
+ parse_state->encap = true;
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->decap = true;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_encap = {
+ .can_offload = tc_act_can_offload_tun_encap,
+ .parse_action = tc_act_parse_tun_encap,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_decap = {
+ .can_offload = tc_act_can_offload_tun_decap,
+ .parse_action = tc_act_parse_tun_decap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
new file mode 100644
index 000000000..b86ac604d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+static int
+add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ const struct flow_action_entry prio_tag_act = {
+ .vlan.vid = 0,
+ .vlan.prio =
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_value(*action,
+ &parse_attr->spec),
+ first_prio) &
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_criteria(*action,
+ &parse_attr->spec),
+ first_prio),
+ };
+
+ return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
+ &prio_tag_act, parse_attr, action,
+ extack);
+}
+
+static int
+parse_tc_vlan_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_esw_flow_attr *attr,
+ u32 *action,
+ struct netlink_ext_ack *extack,
+ struct mlx5e_tc_act_parse_state *parse_state)
+{
+ u8 vlan_idx = attr->total_vlan;
+
+ if (vlan_idx >= MLX5_FS_VLAN_DEPTH) {
+ NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported");
+ return -EOPNOTSUPP;
+ }
+
+ switch (act->id) {
+ case FLOW_ACTION_VLAN_POP:
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
+ } else {
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ }
+ break;
+ case FLOW_ACTION_VLAN_PUSH:
+ attr->vlan_vid[vlan_idx] = act->vlan.vid;
+ attr->vlan_prio[vlan_idx] = act->vlan.prio;
+ attr->vlan_proto[vlan_idx] = act->vlan.proto;
+ if (!attr->vlan_proto[vlan_idx])
+ attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
+
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vlan push action is not supported for vlan depth > 1");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ } else {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
+ (act->vlan.proto != htons(ETH_P_8021Q) ||
+ act->vlan.prio)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan push action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ }
+ break;
+ case FLOW_ACTION_VLAN_POP_ETH:
+ parse_state->eth_pop = true;
+ break;
+ case FLOW_ACTION_VLAN_PUSH_ETH:
+ if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP))
+ return -EOPNOTSUPP;
+ parse_state->eth_push = true;
+ memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN);
+ memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN");
+ return -EINVAL;
+ }
+
+ attr->total_vlan = vlan_idx + 1;
+
+ return 0;
+}
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *vlan_dev = *out_dev;
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_PUSH,
+ .vlan.vid = vlan_dev_vlan_id(vlan_dev),
+ .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
+ .vlan.prio = 0,
+ };
+ int err;
+
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL);
+ if (err)
+ return err;
+
+ rcu_read_lock();
+ *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
+ rcu_read_unlock();
+ if (!*out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(*out_dev))
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack);
+
+ return err;
+}
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_POP,
+ };
+ int nest_level, err = 0;
+
+ nest_level = attr->parse_attr->filter_dev->lower_level -
+ priv->netdev->lower_level;
+ while (nest_level--) {
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action,
+ extack, NULL);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int err;
+
+ if (act->id == FLOW_ACTION_VLAN_PUSH &&
+ (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
+ /* Replace vlan pop+push with vlan modify */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act,
+ attr->parse_attr, &attr->action,
+ parse_state->extack);
+ } else {
+ err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action,
+ parse_state->extack, parse_state);
+ }
+
+ if (err)
+ return err;
+
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int err;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+ /* For prio tag mode, replace vlan pop with rewrite vlan prio
+ * tag rewrite.
+ */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = add_vlan_prio_tag_rewrite_action(priv, parse_attr,
+ &attr->action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan = {
+ .can_offload = tc_act_can_offload_vlan,
+ .parse_action = tc_act_parse_vlan,
+ .post_parse = tc_act_post_parse_vlan,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
new file mode 100644
index 000000000..2fa58c6f4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_VLAN_H__
+#define __MLX5_EN_TC_ACT_VLAN_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
new file mode 100644
index 000000000..9a8a1a6bd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ u16 mask16 = VLAN_VID_MASK;
+ u16 val16 = act->vlan.vid & VLAN_VID_MASK;
+ const struct flow_action_entry pedit_act = {
+ .id = FLOW_ACTION_MANGLE,
+ .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
+ .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
+ .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
+ .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
+ };
+ u8 match_prio_mask, match_prio_val;
+ void *headers_c, *headers_v;
+ int err;
+
+ headers_c = mlx5e_get_match_headers_criteria(*action, &parse_attr->spec);
+ headers_v = mlx5e_get_match_headers_value(*action, &parse_attr->spec);
+
+ if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
+ MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
+ NL_SET_ERR_MSG_MOD(extack, "VLAN rewrite action must have VLAN protocol match");
+ return -EOPNOTSUPP;
+ }
+
+ match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing VLAN prio is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs,
+ extack);
+ *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(parse_state->flow);
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr,
+ &attr->action, parse_state->extack);
+ if (err)
+ return err;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = {
+ .can_offload = tc_act_can_offload_vlan_mangle,
+ .parse_action = tc_act_parse_vlan_mangle,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
new file mode 100644
index 000000000..bb6b1a979
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_TC_CT_FS_H__
+#define __MLX5_EN_TC_CT_FS_H__
+
+struct mlx5_ct_fs {
+ const struct net_device *netdev;
+ struct mlx5_core_dev *dev;
+
+ /* private data */
+ void *priv_data[];
+};
+
+struct mlx5_ct_fs_rule {
+};
+
+struct mlx5_ct_fs_ops {
+ int (*init)(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct);
+ void (*destroy)(struct mlx5_ct_fs *fs);
+
+ struct mlx5_ct_fs_rule * (*ct_rule_add)(struct mlx5_ct_fs *fs,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule);
+ void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule);
+
+ size_t priv_size;
+};
+
+static inline void *mlx5_ct_fs_priv(struct mlx5_ct_fs *fs)
+{
+ return &fs->priv_data;
+}
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void);
+
+#if IS_ENABLED(CONFIG_MLX5_SW_STEERING)
+struct mlx5_ct_fs_ops *mlx5_ct_fs_smfs_ops_get(void);
+#else
+static inline struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return NULL;
+}
+#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */
+
+#endif /* __MLX5_EN_TC_CT_FS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
new file mode 100644
index 000000000..ae4f55be4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "en_tc.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_dmfs debug: " fmt "\n", ##args)
+
+struct mlx5_ct_fs_dmfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+};
+
+static int
+mlx5_ct_fs_dmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ return 0;
+}
+
+static void
+mlx5_ct_fs_dmfs_destroy(struct mlx5_ct_fs *fs)
+{
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_dmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5e_priv *priv = netdev_priv(fs->netdev);
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule;
+ int err;
+
+ dmfs_rule = kzalloc(sizeof(*dmfs_rule), GFP_KERNEL);
+ if (!dmfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ dmfs_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
+ if (IS_ERR(dmfs_rule->rule)) {
+ err = PTR_ERR(dmfs_rule->rule);
+ ct_dbg("Failed to add ct entry fs rule");
+ goto err_insert;
+ }
+
+ dmfs_rule->attr = attr;
+
+ return &dmfs_rule->fs_rule;
+
+err_insert:
+ kfree(dmfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_dmfs_rule,
+ fs_rule);
+
+ mlx5_tc_rule_delete(netdev_priv(fs->netdev), dmfs_rule->rule, dmfs_rule->attr);
+ kfree(dmfs_rule);
+}
+
+static struct mlx5_ct_fs_ops dmfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_dmfs_init,
+ .destroy = mlx5_ct_fs_dmfs_destroy,
+};
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void)
+{
+ return &dmfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
new file mode 100644
index 000000000..2b80fe735
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/refcount.h>
+
+#include "en_tc.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#include "lib/smfs.h"
+
+#define INIT_ERR_PREFIX "ct_fs_smfs init failed"
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args)
+#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16)
+
+struct mlx5_ct_fs_smfs_matcher {
+ struct mlx5dr_matcher *dr_matcher;
+ struct list_head list;
+ int prio;
+ refcount_t ref;
+};
+
+struct mlx5_ct_fs_smfs_matchers {
+ struct mlx5_ct_fs_smfs_matcher smfs_matchers[6];
+ struct list_head used;
+};
+
+struct mlx5_ct_fs_smfs {
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl;
+ struct mlx5_ct_fs_smfs_matchers matchers;
+ struct mlx5_ct_fs_smfs_matchers matchers_nat;
+ struct mlx5dr_action *fwd_action;
+ struct mlx5_flow_table *ct_nat;
+ struct mutex lock; /* Guards matchers */
+};
+
+struct mlx5_ct_fs_smfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5dr_rule *rule;
+ struct mlx5dr_action *count_action;
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+};
+
+static inline void
+mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp,
+ bool gre)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
+
+ if (likely(MLX5_CAP_FLOWTABLE_NIC_RX(fs->dev, ft_field_support.outer_ip_version)))
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ if (likely(ipv4)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6));
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6));
+ }
+
+ if (likely(tcp)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(MLX5_CT_TCP_FLAGS_MASK));
+ } else if (!gre) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport);
+ }
+
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 0, MLX5_CT_ZONE_MASK);
+}
+
+static struct mlx5dr_matcher *
+mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4,
+ bool tcp, bool gre, u32 priority)
+{
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
+
+ dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec);
+ kvfree(spec);
+ if (!dr_matcher)
+ return ERR_PTR(-EINVAL);
+
+ return dr_matcher;
+}
+
+static struct mlx5_ct_fs_smfs_matcher *
+mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher;
+ struct mlx5_ct_fs_smfs_matchers *matchers;
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5dr_table *tbl;
+ struct list_head *prev;
+ int prio;
+
+ matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers;
+ smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre];
+
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ return smfs_matcher;
+
+ mutex_lock(&fs_smfs->lock);
+
+ /* Retry with lock, as another thread might have already created the relevant matcher
+ * till we acquired the lock
+ */
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ goto out_unlock;
+
+ // Find next available priority in sorted used list
+ prio = 0;
+ prev = &matchers->used;
+ list_for_each_entry(m, &matchers->used, list) {
+ prev = &m->list;
+
+ if (m->prio == prio)
+ prio = m->prio + 1;
+ else
+ break;
+ }
+
+ tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl;
+ dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio);
+ if (IS_ERR(dr_matcher)) {
+ netdev_warn(fs->netdev,
+ "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
+ nat, ipv4, tcp, gre, PTR_ERR(dr_matcher));
+
+ smfs_matcher = ERR_CAST(dr_matcher);
+ goto out_unlock;
+ }
+
+ smfs_matcher->dr_matcher = dr_matcher;
+ smfs_matcher->prio = prio;
+ list_add(&smfs_matcher->list, prev);
+ refcount_set(&smfs_matcher->ref, 1);
+
+out_unlock:
+ mutex_unlock(&fs_smfs->lock);
+ return smfs_matcher;
+}
+
+static void
+mlx5_ct_fs_smfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_smfs_matcher *smfs_matcher)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ if (!refcount_dec_and_mutex_lock(&smfs_matcher->ref, &fs_smfs->lock))
+ return;
+
+ mlx5_smfs_matcher_destroy(smfs_matcher->dr_matcher);
+ list_del(&smfs_matcher->list);
+ mutex_unlock(&fs_smfs->lock);
+}
+
+static int
+mlx5_ct_fs_smfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl;
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ post_ct_tbl = mlx5_smfs_table_get_from_fs_ft(post_ct);
+ ct_nat_tbl = mlx5_smfs_table_get_from_fs_ft(ct_nat);
+ ct_tbl = mlx5_smfs_table_get_from_fs_ft(ct);
+ fs_smfs->ct_nat = ct_nat;
+
+ if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) {
+ netdev_warn(fs->netdev, "ct_fs_smfs: failed to init, missing backing dr tables");
+ return -EOPNOTSUPP;
+ }
+
+ ct_dbg("using smfs steering");
+
+ fs_smfs->fwd_action = mlx5_smfs_action_create_dest_table(post_ct_tbl);
+ if (!fs_smfs->fwd_action) {
+ return -EINVAL;
+ }
+
+ fs_smfs->ct_tbl = ct_tbl;
+ fs_smfs->ct_nat_tbl = ct_nat_tbl;
+ mutex_init(&fs_smfs->lock);
+ INIT_LIST_HEAD(&fs_smfs->matchers.used);
+ INIT_LIST_HEAD(&fs_smfs->matchers_nat.used);
+
+ return 0;
+}
+
+static void
+mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ mlx5_smfs_action_destroy(fs_smfs->fwd_action);
+}
+
+static inline bool
+mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys)
+{
+#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
+ const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META);
+ const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
+ const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
+
+ return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
+ used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
+}
+
+static bool
+mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule)
+{
+ struct flow_match_ipv4_addrs ipv4_addrs;
+ struct flow_match_ipv6_addrs ipv6_addrs;
+ struct flow_match_control control;
+ struct flow_match_basic basic;
+ struct flow_match_ports ports;
+ struct flow_match_tcp tcp;
+
+ if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) {
+ ct_dbg("rule uses unexpected dissectors (0x%08x)",
+ flow_rule->match.dissector->used_keys);
+ return false;
+ }
+
+ flow_rule_match_basic(flow_rule, &basic);
+ flow_rule_match_control(flow_rule, &control);
+ flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
+ flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
+ if (basic.key->ip_proto != IPPROTO_GRE)
+ flow_rule_match_ports(flow_rule, &ports);
+ if (basic.key->ip_proto == IPPROTO_TCP)
+ flow_rule_match_tcp(flow_rule, &tcp);
+
+ if (basic.mask->n_proto != htons(0xFFFF) ||
+ (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
+ basic.mask->ip_proto != 0xFF ||
+ (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
+ basic.key->ip_proto != IPPROTO_GRE)) {
+ ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
+ ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
+ basic.key->ip_proto, basic.mask->ip_proto);
+ return false;
+ }
+
+ if (basic.key->ip_proto != IPPROTO_GRE &&
+ (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
+ ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)",
+ ports.mask->src, ports.mask->dst);
+ return false;
+ }
+
+ if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) {
+ ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags);
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+ struct mlx5_ct_fs_smfs_rule *smfs_rule;
+ struct mlx5dr_action *actions[5];
+ struct mlx5dr_rule *rule;
+ int num_actions = 0, err;
+ bool nat, tcp, ipv4, gre;
+
+ if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL);
+ if (!smfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ smfs_rule->count_action = mlx5_smfs_action_create_flow_counter(mlx5_fc_id(attr->counter));
+ if (!smfs_rule->count_action) {
+ err = -EINVAL;
+ goto err_count;
+ }
+
+ actions[num_actions++] = smfs_rule->count_action;
+ actions[num_actions++] = attr->modify_hdr->action.dr_action;
+ actions[num_actions++] = fs_smfs->fwd_action;
+
+ nat = (attr->ft == fs_smfs->ct_nat);
+ ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
+ tcp = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_TCP;
+ gre = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_GRE;
+
+ smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre);
+ if (IS_ERR(smfs_matcher)) {
+ err = PTR_ERR(smfs_matcher);
+ goto err_matcher;
+ }
+
+ rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions,
+ spec->flow_context.flow_source);
+ if (!rule) {
+ err = -EINVAL;
+ goto err_create;
+ }
+
+ smfs_rule->rule = rule;
+ smfs_rule->smfs_matcher = smfs_matcher;
+
+ return &smfs_rule->fs_rule;
+
+err_create:
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_matcher);
+err_matcher:
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+err_count:
+ kfree(smfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_smfs_rule,
+ fs_rule);
+
+ mlx5_smfs_rule_destroy(smfs_rule->rule);
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_rule->smfs_matcher);
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+ kfree(smfs_rule);
+}
+
+static struct mlx5_ct_fs_ops fs_smfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_smfs_init,
+ .destroy = mlx5_ct_fs_smfs_destroy,
+
+ .priv_size = sizeof(struct mlx5_ct_fs_smfs),
+};
+
+struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return &fs_smfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
new file mode 100644
index 000000000..ca834bbcb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/fs.h>
+#include "en/mapping.h"
+#include "en/tc/int_port.h"
+#include "en.h"
+#include "en_rep.h"
+#include "en_tc.h"
+
+struct mlx5e_tc_int_port {
+ enum mlx5e_tc_int_port_type type;
+ int ifindex;
+ u32 match_metadata;
+ u32 mapping;
+ struct list_head list;
+ struct mlx5_flow_handle *rx_rule;
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
+};
+
+struct mlx5e_tc_int_port_priv {
+ struct mlx5_core_dev *dev;
+ struct mutex int_ports_lock; /* Protects int ports list */
+ struct list_head int_ports; /* Uses int_ports_lock */
+ u16 num_ports;
+ bool ul_rep_rx_ready; /* Set when uplink is performing teardown */
+ struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */
+};
+
+bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ MLX5_CAP_GEN(esw->dev, reg_c_preserve);
+}
+
+u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port)
+{
+ return int_port->match_metadata;
+}
+
+int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
+{
+ /* For egress forwarding we can have the case
+ * where the packet came from a vport and redirected
+ * to int port or it came from the uplink, going
+ * via internal port and hairpinned back to uplink
+ * so we set the source to any port in this case.
+ */
+ return int_port->type == MLX5E_TC_INT_PORT_EGRESS ?
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT :
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
+
+u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
+{
+ return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_int_port *int_port,
+ struct mlx5_flow_destination *dest)
+
+{
+ struct mlx5_flow_context *flow_context;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5e_tc_int_port_get_metadata_for_match(int_port));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ /* Overwrite flow tag with the int port metadata mapping
+ * instead of the chain mapping.
+ */
+ flow_context = &spec->flow_context;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = int_port->mapping;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+ &flow_act, dest, 1);
+ if (IS_ERR(flow_rule))
+ mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n",
+ PTR_ERR(flow_rule));
+
+ kvfree(spec);
+
+ return flow_rule;
+}
+
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type)
+{
+ struct mlx5e_tc_int_port *int_port;
+
+ if (!priv->ul_rep_rx_ready)
+ goto not_found;
+
+ list_for_each_entry(int_port, &priv->int_ports, list)
+ if (int_port->ifindex == ifindex && int_port->type == type) {
+ refcount_inc(&int_port->refcnt);
+ return int_port;
+ }
+
+not_found:
+ return NULL;
+}
+
+static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex, enum mlx5e_tc_int_port_type type,
+ u32 *id)
+{
+ u32 mapped_key[2] = {type, ifindex};
+ int err;
+
+ err = mapping_add(priv->metadata_mapping, mapped_key, id);
+ if (err)
+ return err;
+
+ /* Fill upper 4 bits of PFNUM with reserved value */
+ *id |= 0xf << ESW_VPORT_BITS;
+
+ return 0;
+}
+
+static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv,
+ u32 id)
+{
+ id &= (1 << ESW_VPORT_BITS) - 1;
+ mapping_remove(priv->metadata_mapping, id);
+}
+
+/* Must be called with priv->int_ports_lock held */
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type)
+{
+ struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
+ struct mlx5_mapped_obj mapped_obj = {};
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_int_port *int_port;
+ struct mlx5_flow_destination dest;
+ struct mapping_ctx *ctx;
+ u32 match_metadata;
+ u32 mapping;
+ int err;
+
+ if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) {
+ mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d",
+ MLX5E_TC_MAX_INT_PORT_NUM);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ int_port = kzalloc(sizeof(*int_port), GFP_KERNEL);
+ if (!int_port)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata);
+ if (err) {
+ mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d",
+ ifindex);
+ goto err_metadata;
+ }
+
+ /* map metadata to reg_c0 object for miss handling */
+ ctx = esw->offloads.reg_c0_obj_pool;
+ mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA;
+ mapped_obj.int_port_metadata = match_metadata;
+ err = mapping_add(ctx, &mapped_obj, &mapping);
+ if (err)
+ goto err_map;
+
+ int_port->type = type;
+ int_port->ifindex = ifindex;
+ int_port->match_metadata = match_metadata;
+ int_port->mapping = mapping;
+
+ /* Create a match on internal vport metadata in vport table */
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = uplink_rpriv->root_ft;
+
+ int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest);
+ if (IS_ERR(int_port->rx_rule)) {
+ err = PTR_ERR(int_port->rx_rule);
+ mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err);
+ goto err_rx_rule;
+ }
+
+ refcount_set(&int_port->refcnt, 1);
+ list_add_rcu(&int_port->list, &priv->int_ports);
+ priv->num_ports++;
+
+ return int_port;
+
+err_rx_rule:
+ mapping_remove(ctx, int_port->mapping);
+
+err_map:
+ mlx5e_int_port_metadata_free(priv, match_metadata);
+
+err_metadata:
+ kfree(int_port);
+
+ return ERR_PTR(err);
+}
+
+/* Must be called with priv->int_ports_lock held */
+static void
+mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv,
+ struct mlx5e_tc_int_port *int_port)
+{
+ struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
+ struct mapping_ctx *ctx;
+
+ ctx = esw->offloads.reg_c0_obj_pool;
+
+ list_del_rcu(&int_port->list);
+
+ /* The following parameters are not used by the
+ * rcu readers of this int_port object so it is
+ * safe to release them.
+ */
+ if (int_port->rx_rule)
+ mlx5_del_flow_rules(int_port->rx_rule);
+ mapping_remove(ctx, int_port->mapping);
+ mlx5e_int_port_metadata_free(priv, int_port->match_metadata);
+ kfree_rcu(int_port);
+ priv->num_ports--;
+}
+
+/* Must be called with rcu_read_lock held */
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv,
+ u32 metadata)
+{
+ struct mlx5e_tc_int_port *int_port;
+
+ list_for_each_entry_rcu(int_port, &priv->int_ports, list)
+ if (int_port->match_metadata == metadata)
+ return int_port;
+
+ return NULL;
+}
+
+struct mlx5e_tc_int_port *
+mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type)
+{
+ struct mlx5e_tc_int_port *int_port;
+
+ if (!priv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&priv->int_ports_lock);
+
+ /* Reject request if ul rep not ready */
+ if (!priv->ul_rep_rx_ready) {
+ int_port = ERR_PTR(-EOPNOTSUPP);
+ goto done;
+ }
+
+ int_port = mlx5e_int_port_lookup(priv, ifindex, type);
+ if (int_port)
+ goto done;
+
+ /* Alloc and add new int port to list */
+ int_port = mlx5e_int_port_add(priv, ifindex, type);
+
+done:
+ mutex_unlock(&priv->int_ports_lock);
+
+ return int_port;
+}
+
+void
+mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
+ struct mlx5e_tc_int_port *int_port)
+{
+ if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock))
+ return;
+
+ mlx5e_int_port_remove(priv, int_port);
+ mutex_unlock(&priv->int_ports_lock);
+}
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_tc_int_port_init(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_int_port_priv *int_port_priv;
+ u64 mapping_id;
+
+ if (!mlx5e_tc_int_port_supported(esw))
+ return NULL;
+
+ int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL);
+ if (!int_port_priv)
+ return NULL;
+
+ mapping_id = mlx5_query_nic_system_image_guid(priv->mdev);
+
+ int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT,
+ sizeof(u32) * 2,
+ (1 << ESW_VPORT_BITS) - 1, true);
+ if (IS_ERR(int_port_priv->metadata_mapping)) {
+ mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n",
+ PTR_ERR(int_port_priv->metadata_mapping));
+ goto err_mapping;
+ }
+
+ int_port_priv->dev = priv->mdev;
+ mutex_init(&int_port_priv->int_ports_lock);
+ INIT_LIST_HEAD(&int_port_priv->int_ports);
+
+ return int_port_priv;
+
+err_mapping:
+ kfree(int_port_priv);
+
+ return NULL;
+}
+
+void
+mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv)
+{
+ if (!priv)
+ return;
+
+ mutex_destroy(&priv->int_ports_lock);
+ mapping_destroy(priv->metadata_mapping);
+ kfree(priv);
+}
+
+/* Int port rx rules reside in ul rep rx tables.
+ * It is possible the ul rep will go down while there are
+ * still int port rules in its rx table so proper cleanup
+ * is required to free resources.
+ */
+void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_tc_int_port_priv *ppriv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ ppriv = uplink_priv->int_port_priv;
+
+ if (!ppriv)
+ return;
+
+ mutex_lock(&ppriv->int_ports_lock);
+ ppriv->ul_rep_rx_ready = true;
+ mutex_unlock(&ppriv->int_ports_lock);
+}
+
+void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_tc_int_port_priv *ppriv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_int_port *int_port;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ ppriv = uplink_priv->int_port_priv;
+
+ if (!ppriv)
+ return;
+
+ mutex_lock(&ppriv->int_ports_lock);
+
+ ppriv->ul_rep_rx_ready = false;
+
+ list_for_each_entry(int_port, &ppriv->int_ports, list) {
+ if (!IS_ERR_OR_NULL(int_port->rx_rule))
+ mlx5_del_flow_rules(int_port->rx_rule);
+
+ int_port->rx_rule = NULL;
+ }
+
+ mutex_unlock(&ppriv->int_ports_lock);
+}
+
+bool
+mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
+ struct sk_buff *skb, u32 int_vport_metadata,
+ bool *forward_tx)
+{
+ enum mlx5e_tc_int_port_type fwd_type;
+ struct mlx5e_tc_int_port *int_port;
+ struct net_device *dev;
+ int ifindex;
+
+ if (!priv)
+ return false;
+
+ rcu_read_lock();
+ int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata);
+ if (!int_port) {
+ rcu_read_unlock();
+ mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n",
+ int_vport_metadata);
+ return false;
+ }
+
+ ifindex = int_port->ifindex;
+ fwd_type = int_port->type;
+ rcu_read_unlock();
+
+ dev = dev_get_by_index(&init_net, ifindex);
+ if (!dev) {
+ mlx5_core_dbg(priv->dev,
+ "Couldn't find internal port device with ifindex: %d\n",
+ ifindex);
+ return false;
+ }
+
+ skb->skb_iif = dev->ifindex;
+ skb->dev = dev;
+
+ if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) {
+ skb->pkt_type = PACKET_HOST;
+ skb_set_redirected(skb, true);
+ *forward_tx = false;
+ } else {
+ skb_reset_network_header(skb);
+ skb_push_rcsum(skb, skb->mac_len);
+ skb_set_redirected(skb, false);
+ *forward_tx = true;
+ }
+
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h
new file mode 100644
index 000000000..e72c79d30
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_INT_PORT_H__
+#define __MLX5_EN_TC_INT_PORT_H__
+
+#include "en.h"
+
+struct mlx5e_tc_int_port;
+struct mlx5e_tc_int_port_priv;
+
+enum mlx5e_tc_int_port_type {
+ MLX5E_TC_INT_PORT_INGRESS,
+ MLX5E_TC_INT_PORT_EGRESS,
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw);
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_tc_int_port_init(struct mlx5e_priv *priv);
+void
+mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv);
+
+void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv);
+void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv);
+
+bool
+mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
+ struct sk_buff *skb, u32 int_vport_metadata,
+ bool *forward_tx);
+struct mlx5e_tc_int_port *
+mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
+ int ifindex,
+ enum mlx5e_tc_int_port_type type);
+void
+mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
+ struct mlx5e_tc_int_port *int_port);
+
+u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port);
+u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port);
+int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline u32
+mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
+{
+ return 0;
+}
+
+static inline int
+mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
+{
+ return 0;
+}
+
+static inline bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
+{
+ return false;
+}
+
+static inline void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) {}
+static inline void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+#endif /* __MLX5_EN_TC_INT_PORT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
new file mode 100644
index 000000000..be74e1403
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/math64.h>
+#include "lib/aso.h"
+#include "en/tc/post_act.h"
+#include "meter.h"
+#include "en/tc_priv.h"
+
+#define MLX5_START_COLOR_SHIFT 28
+#define MLX5_METER_MODE_SHIFT 24
+#define MLX5_CBS_EXP_SHIFT 24
+#define MLX5_CBS_MAN_SHIFT 16
+#define MLX5_CIR_EXP_SHIFT 8
+
+/* cir = 8*(10^9)*cir_mantissa/(2^cir_exponent)) bits/s */
+#define MLX5_CONST_CIR 8000000000ULL
+#define MLX5_CALC_CIR(m, e) ((MLX5_CONST_CIR * (m)) >> (e))
+#define MLX5_MAX_CIR ((MLX5_CONST_CIR * 0x100) - 1)
+
+/* cbs = cbs_mantissa*2^cbs_exponent */
+#define MLX5_CALC_CBS(m, e) ((m) << (e))
+#define MLX5_MAX_CBS ((0x100ULL << 0x1F) - 1)
+#define MLX5_MAX_HW_CBS 0x7FFFFFFF
+
+struct mlx5e_flow_meter_aso_obj {
+ struct list_head entry;
+ int base_id;
+ int total_meters;
+
+ unsigned long meters_map[0]; /* must be at the end of this struct */
+};
+
+struct mlx5e_flow_meters {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_aso *aso;
+ struct mutex aso_lock; /* Protects aso operations */
+ int log_granularity;
+ u32 pdn;
+
+ DECLARE_HASHTABLE(hashtbl, 8);
+
+ struct mutex sync_lock; /* protect flow meter operations */
+ struct list_head partial_list;
+ struct list_head full_list;
+
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_post_act *post_act;
+};
+
+static void
+mlx5e_flow_meter_cir_calc(u64 cir, u8 *man, u8 *exp)
+{
+ s64 _cir, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cir << e;
+ if ((s64)m < 0) /* overflow */
+ break;
+ m = div64_u64(m, MLX5_CONST_CIR);
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cir = MLX5_CALC_CIR(m, e);
+ _delta = cir - _cir;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+static void
+mlx5e_flow_meter_cbs_calc(u64 cbs, u8 *man, u8 *exp)
+{
+ s64 _cbs, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cbs >> e;
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cbs = MLX5_CALC_CBS(m, e);
+ _delta = cbs - _cbs;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params)
+{
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl;
+ struct mlx5_wqe_aso_data_seg *aso_data;
+ struct mlx5e_flow_meters *flow_meters;
+ u8 cir_man, cir_exp, cbs_man, cbs_exp;
+ struct mlx5_aso_wqe *aso_wqe;
+ unsigned long expires;
+ struct mlx5_aso *aso;
+ u64 rate, burst;
+ u8 ds_cnt;
+ int err;
+
+ rate = meter_params->rate;
+ burst = meter_params->burst;
+
+ /* HW treats each packet as 128 bytes in PPS mode */
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS) {
+ rate <<= 10;
+ burst <<= 7;
+ }
+
+ if (!rate || rate > MLX5_MAX_CIR || !burst || burst > MLX5_MAX_CBS)
+ return -EINVAL;
+
+ /* HW has limitation of total 31 bits for cbs */
+ if (burst > MLX5_MAX_HW_CBS) {
+ mlx5_core_warn(mdev,
+ "burst(%lld) is too large, use HW allowed value(%d)\n",
+ burst, MLX5_MAX_HW_CBS);
+ burst = MLX5_MAX_HW_CBS;
+ }
+
+ mlx5_core_dbg(mdev, "meter mode=%d\n", meter_params->mode);
+ mlx5e_flow_meter_cir_calc(rate, &cir_man, &cir_exp);
+ mlx5_core_dbg(mdev, "rate=%lld, cir=%lld, exp=%d, man=%d\n",
+ rate, MLX5_CALC_CIR(cir_man, cir_exp), cir_exp, cir_man);
+ mlx5e_flow_meter_cbs_calc(burst, &cbs_man, &cbs_exp);
+ mlx5_core_dbg(mdev, "burst=%lld, cbs=%lld, exp=%d, man=%d\n",
+ burst, MLX5_CALC_CBS((u64)cbs_man, cbs_exp), cbs_exp, cbs_man);
+
+ if (!cir_man || !cbs_man)
+ return -EINVAL;
+
+ flow_meters = meter->flow_meters;
+ aso = flow_meters->aso;
+
+ mutex_lock(&flow_meters->aso_lock);
+ aso_wqe = mlx5_aso_get_wqe(aso);
+ ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_DS);
+ mlx5_aso_build_wqe(aso, ds_cnt, aso_wqe, meter->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER);
+
+ aso_ctrl = &aso_wqe->aso_ctrl;
+ memset(aso_ctrl, 0, sizeof(*aso_ctrl));
+ aso_ctrl->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE << 6;
+ aso_ctrl->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE |
+ MLX5_ASO_ALWAYS_TRUE << 4;
+ aso_ctrl->data_offset_condition_operand = MLX5_ASO_LOGICAL_OR << 6;
+ aso_ctrl->data_mask = cpu_to_be64(0x80FFFFFFULL << (meter->idx ? 0 : 32));
+
+ aso_data = (struct mlx5_wqe_aso_data_seg *)(aso_wqe + 1);
+ memset(aso_data, 0, sizeof(*aso_data));
+ aso_data->bytewise_data[meter->idx * 8] = cpu_to_be32((0x1 << 31) | /* valid */
+ (MLX5_FLOW_METER_COLOR_GREEN << MLX5_START_COLOR_SHIFT));
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS)
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_NUM_PACKETS << MLX5_METER_MODE_SHIFT);
+ else
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH << MLX5_METER_MODE_SHIFT);
+
+ aso_data->bytewise_data[meter->idx * 8 + 2] = cpu_to_be32((cbs_exp << MLX5_CBS_EXP_SHIFT) |
+ (cbs_man << MLX5_CBS_MAN_SHIFT) |
+ (cir_exp << MLX5_CIR_EXP_SHIFT) |
+ cir_man);
+
+ mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl);
+
+ /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */
+ expires = jiffies + msecs_to_jiffies(10);
+ do {
+ err = mlx5_aso_poll_cq(aso, true);
+ if (err)
+ usleep_range(2, 10);
+ } while (err && time_is_after_jiffies(expires));
+ mutex_unlock(&flow_meters->aso_lock);
+
+ return err;
+}
+
+static int
+mlx5e_flow_meter_create_aso_obj(struct mlx5e_flow_meters *flow_meters, int *obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_flow_meter_aso_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ void *obj;
+ int err;
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, log_obj_range, flow_meters->log_granularity);
+
+ obj = MLX5_ADDR_OF(create_flow_meter_aso_obj_in, in, flow_meter_aso_obj);
+ MLX5_SET(flow_meter_aso_obj, obj, meter_aso_access_pd, flow_meters->pdn);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err) {
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) created\n", *obj_id);
+ }
+
+ return err;
+}
+
+static void
+mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) destroyed\n", obj_id);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters)
+{
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5_fc *counter;
+ int err, pos, total;
+ u32 id;
+
+ meter = kzalloc(sizeof(*meter), GFP_KERNEL);
+ if (!meter)
+ return ERR_PTR(-ENOMEM);
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_red_counter;
+ }
+ meter->red_counter = counter;
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_green_counter;
+ }
+ meter->green_counter = counter;
+
+ meters_obj = list_first_entry_or_null(&flow_meters->partial_list,
+ struct mlx5e_flow_meter_aso_obj,
+ entry);
+ /* 2 meters in one object */
+ total = 1 << (flow_meters->log_granularity + 1);
+ if (!meters_obj) {
+ err = mlx5e_flow_meter_create_aso_obj(flow_meters, &id);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create flow meter ASO object\n");
+ goto err_create;
+ }
+
+ meters_obj = kzalloc(sizeof(*meters_obj) + BITS_TO_BYTES(total),
+ GFP_KERNEL);
+ if (!meters_obj) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ meters_obj->base_id = id;
+ meters_obj->total_meters = total;
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ pos = 0;
+ } else {
+ pos = find_first_zero_bit(meters_obj->meters_map, total);
+ if (bitmap_weight(meters_obj->meters_map, total) == total - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->full_list);
+ }
+ }
+
+ bitmap_set(meters_obj->meters_map, pos, 1);
+ meter->flow_meters = flow_meters;
+ meter->meters_obj = meters_obj;
+ meter->obj_id = meters_obj->base_id + pos / 2;
+ meter->idx = pos % 2;
+
+ mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+
+ return meter;
+
+err_mem:
+ mlx5e_flow_meter_destroy_aso_obj(mdev, id);
+err_create:
+ mlx5_fc_destroy(mdev, meter->green_counter);
+err_green_counter:
+ mlx5_fc_destroy(mdev, meter->red_counter);
+err_red_counter:
+ kfree(meter);
+ return ERR_PTR(err);
+}
+
+static void
+__mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ int n, pos;
+
+ mlx5_fc_destroy(mdev, meter->green_counter);
+ mlx5_fc_destroy(mdev, meter->red_counter);
+
+ meters_obj = meter->meters_obj;
+ pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx;
+ bitmap_clear(meters_obj->meters_map, pos, 1);
+ n = bitmap_weight(meters_obj->meters_map, meters_obj->total_meters);
+ if (n == 0) {
+ list_del(&meters_obj->entry);
+ mlx5e_flow_meter_destroy_aso_obj(mdev, meters_obj->base_id);
+ kfree(meters_obj);
+ } else if (n == meters_obj->total_meters - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ }
+
+ mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+ kfree(meter);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_tc_meter_get(struct mlx5e_flow_meters *flow_meters, u32 index)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ hash_for_each_possible(flow_meters->hashtbl, meter, hlist, index)
+ if (meter->params.index == index)
+ goto add_ref;
+
+ return ERR_PTR(-ENOENT);
+
+add_ref:
+ meter->refcnt++;
+
+ return meter;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ mutex_unlock(&flow_meters->sync_lock);
+
+ return meter;
+}
+
+static void
+__mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ if (--meter->refcnt == 0) {
+ hash_del(&meter->hlist);
+ __mlx5e_flow_meter_free(meter);
+ }
+}
+
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+
+ mutex_lock(&flow_meters->sync_lock);
+ __mlx5e_tc_meter_put(meter);
+ mutex_unlock(&flow_meters->sync_lock);
+}
+
+static struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = __mlx5e_flow_meter_alloc(flow_meters);
+ if (IS_ERR(meter))
+ return meter;
+
+ hash_add(flow_meters->hashtbl, &meter->hlist, params->index);
+ meter->params.index = params->index;
+ meter->refcnt++;
+
+ return meter;
+}
+
+static int
+__mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ int err = 0;
+
+ if (meter->params.mode != params->mode || meter->params.rate != params->rate ||
+ meter->params.burst != params->burst) {
+ err = mlx5e_tc_meter_modify(mdev, meter, params);
+ if (err)
+ goto out;
+
+ meter->params.mode = params->mode;
+ meter->params.rate = params->rate;
+ meter->params.burst = params->burst;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&flow_meters->sync_lock);
+ err = __mlx5e_tc_meter_update(meter, params);
+ mutex_unlock(&flow_meters->sync_lock);
+ return err;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ if (IS_ERR(meter)) {
+ meter = mlx5e_tc_meter_alloc(flow_meters, params);
+ if (IS_ERR(meter)) {
+ err = PTR_ERR(meter);
+ goto err_get;
+ }
+ }
+
+ err = __mlx5e_tc_meter_update(meter, params);
+ if (err)
+ goto err_update;
+
+ mutex_unlock(&flow_meters->sync_lock);
+ return meter;
+
+err_update:
+ __mlx5e_tc_meter_put(meter);
+err_get:
+ mutex_unlock(&flow_meters->sync_lock);
+ return ERR_PTR(err);
+}
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters)
+{
+ return flow_meters->ns_type;
+}
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (IS_ERR_OR_NULL(post_act)) {
+ netdev_dbg(priv->netdev,
+ "flow meter offload is not supported, post action is missing\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ flow_meters = kzalloc(sizeof(*flow_meters), GFP_KERNEL);
+ if (!flow_meters)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_core_alloc_pd(mdev, &flow_meters->pdn);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc pd for flow meter aso, err=%d\n", err);
+ goto err_out;
+ }
+
+ flow_meters->aso = mlx5_aso_create(mdev, flow_meters->pdn);
+ if (IS_ERR(flow_meters->aso)) {
+ mlx5_core_warn(mdev, "Failed to create aso wqe for flow meter\n");
+ err = PTR_ERR(flow_meters->aso);
+ goto err_sq;
+ }
+
+ mutex_init(&flow_meters->sync_lock);
+ INIT_LIST_HEAD(&flow_meters->partial_list);
+ INIT_LIST_HEAD(&flow_meters->full_list);
+
+ flow_meters->ns_type = ns_type;
+ flow_meters->mdev = mdev;
+ flow_meters->post_act = post_act;
+ mutex_init(&flow_meters->aso_lock);
+ flow_meters->log_granularity = min_t(int, 6,
+ MLX5_CAP_QOS(mdev, log_meter_aso_max_alloc));
+
+ return flow_meters;
+
+err_sq:
+ mlx5_core_dealloc_pd(mdev, flow_meters->pdn);
+err_out:
+ kfree(flow_meters);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters)
+{
+ if (IS_ERR_OR_NULL(flow_meters))
+ return;
+
+ mlx5_aso_destroy(flow_meters->aso);
+ mlx5_core_dealloc_pd(flow_meters->mdev, flow_meters->pdn);
+ kfree(flow_meters);
+}
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse)
+{
+ u64 bytes1, packets1, lastuse1;
+ u64 bytes2, packets2, lastuse2;
+
+ mlx5_fc_query_cached(meter->green_counter, &bytes1, &packets1, &lastuse1);
+ mlx5_fc_query_cached(meter->red_counter, &bytes2, &packets2, &lastuse2);
+
+ *bytes = bytes1 + bytes2;
+ *packets = packets1 + packets2;
+ *drops = packets2;
+ *lastuse = max_t(u64, lastuse1, lastuse2);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
new file mode 100644
index 000000000..6de6e8a16
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_FLOW_METER_H__
+#define __MLX5_EN_FLOW_METER_H__
+
+struct mlx5e_post_meter_priv;
+struct mlx5e_flow_meter_aso_obj;
+struct mlx5e_flow_meters;
+struct mlx5_flow_attr;
+
+enum mlx5e_flow_meter_mode {
+ MLX5_RATE_LIMIT_BPS,
+ MLX5_RATE_LIMIT_PPS,
+};
+
+struct mlx5e_flow_meter_params {
+ enum mlx5e_flow_meter_mode mode;
+ /* police action index */
+ u32 index;
+ u64 rate;
+ u64 burst;
+};
+
+struct mlx5e_flow_meter_handle {
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ u32 obj_id;
+ u8 idx;
+
+ int refcnt;
+ struct hlist_node hlist;
+ struct mlx5e_flow_meter_params params;
+
+ struct mlx5_fc *green_counter;
+ struct mlx5_fc *red_counter;
+};
+
+struct mlx5e_meter_attr {
+ struct mlx5e_flow_meter_params params;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5e_post_meter_priv *post_meter;
+};
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params);
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter);
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params);
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters);
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_action);
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters);
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse);
+
+#endif /* __MLX5_EN_FLOW_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
new file mode 100644
index 000000000..0290e0dea
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en/tc_priv.h"
+#include "en_tc.h"
+#include "post_act.h"
+#include "mlx5_core.h"
+#include "fs_core.h"
+
+struct mlx5e_post_act {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_flow_table *ft;
+ struct mlx5e_priv *priv;
+ struct xarray ids;
+};
+
+struct mlx5e_post_act_handle {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_handle *rule;
+ u32 id;
+};
+
+#define MLX5_POST_ACTION_BITS MLX5_REG_MAPPING_MBITS(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MASK MLX5_REG_MAPPING_MASK(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MAX MLX5_POST_ACTION_MASK
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ enum fs_flow_table_type table_type = ns_type == MLX5_FLOW_NAMESPACE_FDB ?
+ FS_FT_FDB : FS_FT_NIC_RX;
+ struct mlx5e_post_act *post_act;
+ int err;
+
+ if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) {
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
+ mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+ err = -EOPNOTSUPP;
+ goto err_check;
+ }
+
+ post_act = kzalloc(sizeof(*post_act), GFP_KERNEL);
+ if (!post_act) {
+ err = -ENOMEM;
+ goto err_check;
+ }
+ post_act->ft = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(post_act->ft)) {
+ err = PTR_ERR(post_act->ft);
+ mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err);
+ goto err_ft;
+ }
+ post_act->chains = chains;
+ post_act->ns_type = ns_type;
+ post_act->priv = priv;
+ xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1);
+ return post_act;
+
+err_ft:
+ kfree(post_act);
+err_check:
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act)
+{
+ if (IS_ERR_OR_NULL(post_act))
+ return;
+
+ xa_destroy(&post_act->ids);
+ mlx5_chains_destroy_global_table(post_act->chains, post_act->ft);
+ kfree(post_act);
+}
+
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Post action rule matches on fte_id and executes original rule's tc rule action */
+ mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK);
+
+ handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr);
+ if (IS_ERR(handle->rule)) {
+ err = PTR_ERR(handle->rule);
+ netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
+ goto err_rule;
+ }
+
+ kvfree(spec);
+ return 0;
+
+err_rule:
+ kvfree(spec);
+ return err;
+}
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr)
+{
+ struct mlx5e_post_act_handle *handle;
+ int err;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle) {
+ kfree(handle);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = post_act->ft;
+ post_attr->inner_match_level = MLX5_MATCH_NONE;
+ post_attr->outer_match_level = MLX5_MATCH_NONE;
+ post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
+
+ handle->ns_type = post_act->ns_type;
+ /* Splits were handled before post action */
+ if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ post_attr->esw_attr->split_count = 0;
+
+ err = xa_alloc(&post_act->ids, &handle->id, post_attr,
+ XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL);
+ if (err)
+ goto err_xarray;
+
+ handle->attr = post_attr;
+
+ return handle;
+
+err_xarray:
+ kfree(handle);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr);
+ handle->rule = NULL;
+}
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle)
+{
+ if (!IS_ERR_OR_NULL(handle->rule))
+ mlx5e_tc_post_act_unoffload(post_act, handle);
+ xa_erase(&post_act->ids, handle->id);
+ kfree(handle);
+}
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act)
+{
+ return post_act->ft;
+}
+
+/* Allocate a header modify action to write the post action handle fte id to a register. */
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts)
+{
+ return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
new file mode 100644
index 000000000..40b8df184
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_POST_ACTION_H__
+#define __MLX5_POST_ACTION_H__
+
+#include "en.h"
+#include "lib/fs_chains.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_priv;
+struct mlx5e_tc_mod_hdr_acts;
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act);
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr);
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle);
+
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
+void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act);
+
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts);
+
+#endif /* __MLX5_POST_ACTION_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
new file mode 100644
index 000000000..8b77e8228
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en/tc_priv.h"
+#include "post_meter.h"
+#include "en/tc/post_act.h"
+
+#define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG)
+#define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG)
+
+struct mlx5e_post_meter_priv {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_handle *fwd_green_rule;
+ struct mlx5_flow_handle *drop_red_rule;
+};
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter)
+{
+ return post_meter->ft;
+}
+
+static int
+mlx5e_post_meter_table_create(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *root_ns;
+
+ root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type);
+ if (!root_ns) {
+ mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = FDB_SLOW_PATH;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ post_meter->ft = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(post_meter->ft)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n");
+ return PTR_ERR(post_meter->ft);
+ }
+
+ return 0;
+}
+
+static int
+mlx5e_post_meter_fg_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *misc2, *match_criteria;
+ u32 *flow_group_in;
+ int err = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ misc2 = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_5, MLX5_PACKET_COLOR_MASK);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ post_meter->fg = mlx5_create_flow_group(post_meter->ft, flow_group_in);
+ if (IS_ERR(post_meter->fg)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n");
+ err = PTR_ERR(post_meter->fg);
+ }
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static int
+mlx5e_post_meter_rules_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[0].counter_id = mlx5_fc_id(red_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow drop rule\n");
+ err = PTR_ERR(rule);
+ goto err_red;
+ }
+ post_meter->drop_red_rule = rule;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[0].ft = mlx5e_tc_post_act_get_ft(post_act);
+ dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[1].counter_id = mlx5_fc_id(green_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 2);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow fwd rule\n");
+ err = PTR_ERR(rule);
+ goto err_green;
+ }
+ post_meter->fwd_green_rule = rule;
+
+ kvfree(spec);
+ return 0;
+
+err_green:
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+err_red:
+ kvfree(spec);
+ return err;
+}
+
+static void
+mlx5e_post_meter_rules_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+ mlx5_del_flow_rules(post_meter->fwd_green_rule);
+}
+
+static void
+mlx5e_post_meter_fg_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_group(post_meter->fg);
+}
+
+static void
+mlx5e_post_meter_table_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_table(post_meter->ft);
+}
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5e_post_meter_priv *post_meter;
+ int err;
+
+ post_meter = kzalloc(sizeof(*post_meter), GFP_KERNEL);
+ if (!post_meter)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_post_meter_table_create(priv, ns_type, post_meter);
+ if (err)
+ goto err_ft;
+
+ err = mlx5e_post_meter_fg_create(priv, post_meter);
+ if (err)
+ goto err_fg;
+
+ err = mlx5e_post_meter_rules_create(priv, post_meter, post_act, green_counter,
+ red_counter);
+ if (err)
+ goto err_rules;
+
+ return post_meter;
+
+err_rules:
+ mlx5e_post_meter_fg_destroy(post_meter);
+err_fg:
+ mlx5e_post_meter_table_destroy(post_meter);
+err_ft:
+ kfree(post_meter);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5e_post_meter_rules_destroy(post_meter);
+ mlx5e_post_meter_fg_destroy(post_meter);
+ mlx5e_post_meter_table_destroy(post_meter);
+ kfree(post_meter);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
new file mode 100644
index 000000000..34d0e4b9f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_POST_METER_H__
+#define __MLX5_EN_POST_METER_H__
+
+#define packet_color_to_reg { \
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5, \
+ .moffset = 0, \
+ .mlen = 8, \
+ .soffset = MLX5_BYTE_OFF(fte_match_param, \
+ misc_parameters_2.metadata_reg_c_5), \
+}
+
+struct mlx5e_post_meter_priv;
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter);
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter);
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter);
+
+#endif /* __MLX5_EN_POST_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
new file mode 100644
index 000000000..c57b09727
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/skbuff.h>
+#include <net/psample.h>
+#include "en/mapping.h"
+#include "en/tc/post_act.h"
+#include "en/tc/act/sample.h"
+#include "en/mod_hdr.h"
+#include "sample.h"
+#include "eswitch.h"
+#include "en_tc.h"
+#include "fs_core.h"
+
+#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024)
+
+static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
+ .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE,
+ .max_num_groups = 0, /* default num of groups */
+ .flags = 0,
+};
+
+struct mlx5e_tc_psample {
+ struct mlx5_eswitch *esw;
+ struct mlx5_flow_table *termtbl;
+ struct mlx5_flow_handle *termtbl_rule;
+ DECLARE_HASHTABLE(hashtbl, 8);
+ struct mutex ht_lock; /* protect hashtbl */
+ DECLARE_HASHTABLE(restore_hashtbl, 8);
+ struct mutex restore_lock; /* protect restore_hashtbl */
+ struct mlx5e_post_act *post_act;
+};
+
+struct mlx5e_sampler {
+ struct hlist_node hlist;
+ u32 sampler_id;
+ u32 sample_ratio;
+ u32 sample_table_id;
+ u32 default_table_id;
+ int count;
+};
+
+struct mlx5e_sample_flow {
+ struct mlx5e_sampler *sampler;
+ struct mlx5e_sample_restore *restore;
+ struct mlx5_flow_attr *pre_attr;
+ struct mlx5_flow_handle *pre_rule;
+ struct mlx5_flow_attr *post_attr;
+ struct mlx5_flow_handle *post_rule;
+};
+
+struct mlx5e_sample_restore {
+ struct hlist_node hlist;
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_flow_handle *rule;
+ u32 obj_id;
+ int count;
+};
+
+static int
+sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample)
+{
+ struct mlx5_eswitch *esw = tc_psample->esw;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_act act = {};
+ int err;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table)) {
+ mlx5_core_warn(dev, "termination table is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ mlx5_core_warn(dev, "failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft_attr.prio = FDB_SLOW_PATH;
+ ft_attr.max_fte = 1;
+ ft_attr.level = 1;
+ tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(tc_psample->termtbl)) {
+ err = PTR_ERR(tc_psample->termtbl);
+ mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err);
+ return err;
+ }
+
+ act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.vport.num = esw->manager_vport;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1);
+ if (IS_ERR(tc_psample->termtbl_rule)) {
+ err = PTR_ERR(tc_psample->termtbl_rule);
+ mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample)
+{
+ mlx5_del_flow_rules(tc_psample->termtbl_rule);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
+}
+
+static int
+sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler)
+{
+ u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u64 general_obj_types;
+ void *obj;
+ int err;
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER))
+ return -EOPNOTSUPP;
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))
+ return -EOPNOTSUPP;
+
+ obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object);
+ MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB);
+ MLX5_SET(sampler_obj, obj, ignore_flow_level, 1);
+ MLX5_SET(sampler_obj, obj, level, 1);
+ MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio);
+ MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id);
+ MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id);
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return err;
+}
+
+static void
+sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static u32
+sampler_hash(u32 sample_ratio, u32 default_table_id)
+{
+ return jhash_2words(sample_ratio, default_table_id, 0);
+}
+
+static int
+sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2)
+{
+ return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2;
+}
+
+static struct mlx5e_sampler *
+sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id)
+{
+ struct mlx5e_sampler *sampler;
+ u32 hash_key;
+ int err;
+
+ mutex_lock(&tc_psample->ht_lock);
+ hash_key = sampler_hash(sample_ratio, default_table_id);
+ hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key)
+ if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id,
+ sample_ratio, default_table_id))
+ goto add_ref;
+
+ sampler = kzalloc(sizeof(*sampler), GFP_KERNEL);
+ if (!sampler) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ sampler->sample_table_id = tc_psample->termtbl->id;
+ sampler->default_table_id = default_table_id;
+ sampler->sample_ratio = sample_ratio;
+
+ err = sampler_obj_create(tc_psample->esw->dev, sampler);
+ if (err)
+ goto err_create;
+
+ hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key);
+
+add_ref:
+ sampler->count++;
+ mutex_unlock(&tc_psample->ht_lock);
+ return sampler;
+
+err_create:
+ kfree(sampler);
+err_alloc:
+ mutex_unlock(&tc_psample->ht_lock);
+ return ERR_PTR(err);
+}
+
+static void
+sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler)
+{
+ mutex_lock(&tc_psample->ht_lock);
+ if (--sampler->count == 0) {
+ hash_del(&sampler->hlist);
+ sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id);
+ kfree(sampler);
+ }
+ mutex_unlock(&tc_psample->ht_lock);
+}
+
+/* obj_id is used to restore the sample parameters.
+ * Set fte_id in original flow table, then match it in the default table.
+ * Only set it for NICs can preserve reg_c or decap action. For other cases,
+ * use the same match in the default table.
+ * Use one header rewrite for both obj_id and fte_id.
+ */
+static struct mlx5_modify_hdr *
+sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct mlx5_modify_hdr *modify_hdr;
+ int err;
+
+ err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ CHAIN_TO_REG, obj_id);
+ if (err)
+ goto err_set_regc0;
+
+ modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
+ mod_acts->num_actions,
+ mod_acts->actions);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ goto err_modify_hdr;
+ }
+
+ mlx5e_mod_hdr_dealloc(mod_acts);
+ return modify_hdr;
+
+err_modify_hdr:
+ mlx5e_mod_hdr_dealloc(mod_acts);
+err_set_regc0:
+ return ERR_PTR(err);
+}
+
+static struct mlx5e_sample_restore *
+sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct mlx5_eswitch *esw = tc_psample->esw;
+ struct mlx5_core_dev *mdev = esw->dev;
+ struct mlx5e_sample_restore *restore;
+ struct mlx5_modify_hdr *modify_hdr;
+ int err;
+
+ mutex_lock(&tc_psample->restore_lock);
+ hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id)
+ if (restore->obj_id == obj_id)
+ goto add_ref;
+
+ restore = kzalloc(sizeof(*restore), GFP_KERNEL);
+ if (!restore) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ restore->obj_id = obj_id;
+
+ modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts);
+ if (IS_ERR(modify_hdr)) {
+ err = PTR_ERR(modify_hdr);
+ goto err_modify_hdr;
+ }
+ restore->modify_hdr = modify_hdr;
+
+ restore->rule = esw_add_restore_rule(esw, obj_id);
+ if (IS_ERR(restore->rule)) {
+ err = PTR_ERR(restore->rule);
+ goto err_restore;
+ }
+
+ hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id);
+add_ref:
+ restore->count++;
+ mutex_unlock(&tc_psample->restore_lock);
+ return restore;
+
+err_restore:
+ mlx5_modify_header_dealloc(mdev, restore->modify_hdr);
+err_modify_hdr:
+ kfree(restore);
+err_alloc:
+ mutex_unlock(&tc_psample->restore_lock);
+ return ERR_PTR(err);
+}
+
+static void
+sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore)
+{
+ mutex_lock(&tc_psample->restore_lock);
+ if (--restore->count == 0)
+ hash_del(&restore->hlist);
+ mutex_unlock(&tc_psample->restore_lock);
+
+ if (!restore->count) {
+ mlx5_del_flow_rules(restore->rule);
+ mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr);
+ kfree(restore);
+ }
+}
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
+{
+ u32 trunc_size = mapped_obj->sample.trunc_size;
+ struct psample_group psample_group = {};
+ struct psample_metadata md = {};
+
+ md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len;
+ md.in_ifindex = skb->dev->ifindex;
+ psample_group.group_num = mapped_obj->sample.group_id;
+ psample_group.net = &init_net;
+ skb_push(skb, skb->mac_len);
+
+ psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
+}
+
+static int
+add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr,
+ u32 *default_tbl_id)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB);
+ struct mlx5_vport_tbl_attr per_vport_tbl_attr;
+ struct mlx5_flow_table *default_tbl;
+ struct mlx5_flow_attr *post_attr;
+ int err;
+
+ /* Allocate default table per vport, chain and prio. Otherwise, there is
+ * only one default table for the same sampler object. Rules with different
+ * prio and chain may overlap. For CT sample action, per vport default
+ * table is needed to resotre the metadata.
+ */
+ per_vport_tbl_attr.chain = attr->chain;
+ per_vport_tbl_attr.prio = attr->prio;
+ per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
+ per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
+ if (IS_ERR(default_tbl)) {
+ err = PTR_ERR(default_tbl);
+ goto err_default_tbl;
+ }
+ *default_tbl_id = default_tbl->id;
+
+ post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!post_attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+ sample_flow->post_attr = post_attr;
+ memcpy(post_attr, attr, attr_sz);
+ /* Perform the original matches on the default table.
+ * Offload all actions except the sample action.
+ */
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = default_tbl;
+ post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT;
+
+ /* When offloading sample and encap action, if there is no valid
+ * neigh data struct, a slow path rule is offloaded first. Source
+ * port metadata match is set at that time. A per vport table is
+ * already allocated. No need to match it again. So clear the source
+ * port metadata match.
+ */
+ mlx5_eswitch_clear_rule_source_port(esw, spec);
+ sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr);
+ if (IS_ERR(sample_flow->post_rule)) {
+ err = PTR_ERR(sample_flow->post_rule);
+ goto err_rule;
+ }
+ return 0;
+
+err_rule:
+ kfree(post_attr);
+err_attr:
+ mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
+err_default_tbl:
+ return err;
+}
+
+static void
+del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_vport_tbl_attr tbl_attr;
+
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr);
+ kfree(sample_flow->post_attr);
+ tbl_attr.chain = attr->chain;
+ tbl_attr.prio = attr->prio;
+ tbl_attr.vport = esw_attr->in_rep->vport;
+ tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ mlx5_esw_vporttbl_put(esw, &tbl_attr);
+}
+
+/* For the following typical flow table:
+ *
+ * +-------------------------------+
+ * + original flow table +
+ * +-------------------------------+
+ * + original match +
+ * +-------------------------------+
+ * + sample action + other actions +
+ * +-------------------------------+
+ *
+ * We translate the tc filter with sample action to the following HW model:
+ *
+ * +---------------------+
+ * + original flow table +
+ * +---------------------+
+ * + original match +
+ * +---------------------+
+ * | set fte_id (if reg_c preserve cap)
+ * | do decap (if required)
+ * v
+ * +------------------------------------------------+
+ * + Flow Sampler Object +
+ * +------------------------------------------------+
+ * + sample ratio +
+ * +------------------------------------------------+
+ * + sample table id | default table id +
+ * +------------------------------------------------+
+ * | |
+ * v v
+ * +-----------------------------+ +-------------------+
+ * + sample table + + default table +
+ * +-----------------------------+ +-------------------+
+ * + forward to management vport + |
+ * +-----------------------------+ |
+ * +-------+------+
+ * | |reg_c preserve cap
+ * | |or decap action
+ * v v
+ * +-----------------+ +-------------+
+ * + per vport table + + post action +
+ * +-----------------+ +-------------+
+ * + original match +
+ * +-----------------+
+ * + other actions +
+ * +-----------------+
+ */
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_esw_flow_attr *pre_esw_attr;
+ struct mlx5_mapped_obj restore_obj = {};
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ struct mlx5e_sample_flow *sample_flow;
+ struct mlx5e_sample_attr *sample_attr;
+ struct mlx5_flow_attr *pre_attr;
+ struct mlx5_eswitch *esw;
+ u32 default_tbl_id;
+ u32 obj_id;
+ int err;
+
+ if (IS_ERR_OR_NULL(tc_psample))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
+ if (!sample_flow)
+ return ERR_PTR(-ENOMEM);
+ sample_attr = &attr->sample_attr;
+ sample_attr->sample_flow = sample_flow;
+
+ /* For NICs with reg_c_preserve support or decap action, use
+ * post action instead of the per vport, chain and prio table.
+ * Only match the fte id instead of the same match in the
+ * original flow table.
+ */
+ esw = tc_psample->esw;
+ if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) {
+ struct mlx5_flow_table *ft;
+
+ ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act);
+ default_tbl_id = ft->id;
+ } else {
+ err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id);
+ if (err)
+ goto err_post_rule;
+ }
+
+ /* Create sampler object. */
+ sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id);
+ if (IS_ERR(sample_flow->sampler)) {
+ err = PTR_ERR(sample_flow->sampler);
+ goto err_sampler;
+ }
+ sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+
+ /* Create an id mapping reg_c0 value to sample object. */
+ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
+ restore_obj.sample.group_id = sample_attr->group_num;
+ restore_obj.sample.rate = sample_attr->rate;
+ restore_obj.sample.trunc_size = sample_attr->trunc_size;
+ restore_obj.sample.tunnel_id = attr->tunnel_id;
+ err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
+ if (err)
+ goto err_obj_id;
+ sample_attr->restore_obj_id = obj_id;
+
+ /* Create sample restore context. */
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+ sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts);
+ if (IS_ERR(sample_flow->restore)) {
+ err = PTR_ERR(sample_flow->restore);
+ goto err_sample_restore;
+ }
+
+ /* Perform the original matches on the original table. Offload the
+ * sample action. The destination is the sampler object.
+ */
+ pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!pre_attr) {
+ err = -ENOMEM;
+ goto err_alloc_pre_flow_attr;
+ }
+ pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ /* For decap action, do decap in the original flow table instead of the
+ * default flow table.
+ */
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
+ pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE;
+ pre_attr->inner_match_level = attr->inner_match_level;
+ pre_attr->outer_match_level = attr->outer_match_level;
+ pre_attr->chain = attr->chain;
+ pre_attr->prio = attr->prio;
+ pre_attr->ft = attr->ft;
+ pre_attr->sample_attr = *sample_attr;
+ pre_esw_attr = pre_attr->esw_attr;
+ pre_esw_attr->in_mdev = esw_attr->in_mdev;
+ pre_esw_attr->in_rep = esw_attr->in_rep;
+ sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr);
+ if (IS_ERR(sample_flow->pre_rule)) {
+ err = PTR_ERR(sample_flow->pre_rule);
+ goto err_pre_offload_rule;
+ }
+ sample_flow->pre_attr = pre_attr;
+
+ return sample_flow->pre_rule;
+
+err_pre_offload_rule:
+ kfree(pre_attr);
+err_alloc_pre_flow_attr:
+ sample_restore_put(tc_psample, sample_flow->restore);
+err_sample_restore:
+ mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id);
+err_obj_id:
+ sampler_put(tc_psample, sample_flow->sampler);
+err_sampler:
+ if (sample_flow->post_rule)
+ del_post_rule(esw, sample_flow, attr);
+err_post_rule:
+ kfree(sample_flow);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_sample_flow *sample_flow;
+ struct mlx5_eswitch *esw;
+
+ if (IS_ERR_OR_NULL(tc_psample))
+ return;
+
+ /* The following delete order can't be changed, otherwise,
+ * will hit fw syndromes.
+ */
+ esw = tc_psample->esw;
+ sample_flow = attr->sample_attr.sample_flow;
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
+
+ sample_restore_put(tc_psample, sample_flow->restore);
+ mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id);
+ sampler_put(tc_psample, sample_flow->sampler);
+ if (sample_flow->post_rule)
+ del_post_rule(esw, sample_flow, attr);
+
+ kfree(sample_flow->pre_attr);
+ kfree(sample_flow);
+}
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
+{
+ struct mlx5e_tc_psample *tc_psample;
+ int err;
+
+ tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL);
+ if (!tc_psample)
+ return ERR_PTR(-ENOMEM);
+ if (IS_ERR_OR_NULL(post_act)) {
+ err = PTR_ERR(post_act);
+ goto err_post_act;
+ }
+ tc_psample->post_act = post_act;
+ tc_psample->esw = esw;
+ err = sampler_termtbl_create(tc_psample);
+ if (err)
+ goto err_post_act;
+
+ mutex_init(&tc_psample->ht_lock);
+ mutex_init(&tc_psample->restore_lock);
+
+ return tc_psample;
+
+err_post_act:
+ kfree(tc_psample);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample)
+{
+ if (IS_ERR_OR_NULL(tc_psample))
+ return;
+
+ mutex_destroy(&tc_psample->restore_lock);
+ mutex_destroy(&tc_psample->ht_lock);
+ sampler_termtbl_destroy(tc_psample);
+ kfree(tc_psample);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
new file mode 100644
index 000000000..a569367ea
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_SAMPLE_H__
+#define __MLX5_EN_TC_SAMPLE_H__
+
+#include "eswitch.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_tc_psample;
+struct mlx5e_post_act;
+
+struct mlx5e_sample_attr {
+ u32 group_num;
+ u32 rate;
+ u32 trunc_size;
+ u32 restore_obj_id;
+ u32 sampler_id;
+ struct mlx5e_sample_flow *sample_flow;
+};
+
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
+
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act);
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
+
+#else /* CONFIG_MLX5_TC_SAMPLE */
+
+static inline struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{ return ERR_PTR(-EOPNOTSUPP); }
+
+static inline void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr) {}
+
+static inline struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
+{ return ERR_PTR(-EOPNOTSUPP); }
+
+static inline void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) {}
+
+static inline void
+mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) {}
+
+#endif /* CONFIG_MLX5_TC_SAMPLE */
+#endif /* __MLX5_EN_TC_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
new file mode 100644
index 000000000..f01f7dfdb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -0,0 +1,2272 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <uapi/linux/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_ct.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <linux/workqueue.h>
+#include <linux/refcount.h>
+#include <linux/xarray.h>
+#include <linux/if_macvlan.h>
+#include <linux/debugfs.h>
+
+#include "lib/fs_chains.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+#include "en/tc_priv.h"
+#include "en/mod_hdr.h"
+#include "en/mapping.h"
+#include "en/tc/post_act.h"
+#include "en.h"
+#include "en_tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+
+#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
+#define MLX5_CT_STATE_TRK_BIT BIT(2)
+#define MLX5_CT_STATE_NAT_BIT BIT(3)
+#define MLX5_CT_STATE_REPLY_BIT BIT(4)
+#define MLX5_CT_STATE_RELATED_BIT BIT(5)
+#define MLX5_CT_STATE_INVALID_BIT BIT(6)
+
+#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
+#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
+
+/* Statically allocate modify actions for
+ * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
+ * This will be increased dynamically if needed (for the ipv6 snat + dnat).
+ */
+#define MLX5_CT_MIN_MOD_ACTS 10
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
+
+struct mlx5_tc_ct_debugfs {
+ struct {
+ atomic_t offloaded;
+ atomic_t rx_dropped;
+ } stats;
+
+ struct dentry *root;
+};
+
+struct mlx5_tc_ct_priv {
+ struct mlx5_core_dev *dev;
+ const struct net_device *netdev;
+ struct mod_hdr_tbl *mod_hdr_tbl;
+ struct xarray tuple_ids;
+ struct rhashtable zone_ht;
+ struct rhashtable ct_tuples_ht;
+ struct rhashtable ct_tuples_nat_ht;
+ struct mlx5_flow_table *ct;
+ struct mlx5_flow_table *ct_nat;
+ struct mlx5e_post_act *post_act;
+ struct mutex control_lock; /* guards parallel adds/dels */
+ struct mapping_ctx *zone_mapping;
+ struct mapping_ctx *labels_mapping;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_ct_fs *fs;
+ struct mlx5_ct_fs_ops *fs_ops;
+ spinlock_t ht_lock; /* protects ft entries */
+ struct workqueue_struct *wq;
+
+ struct mlx5_tc_ct_debugfs debugfs;
+};
+
+struct mlx5_ct_flow {
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_flow_handle *pre_ct_rule;
+ struct mlx5_ct_ft *ft;
+ u32 chain_mapping;
+};
+
+struct mlx5_ct_zone_rule {
+ struct mlx5_ct_fs_rule *rule;
+ struct mlx5e_mod_hdr_handle *mh;
+ struct mlx5_flow_attr *attr;
+ bool nat;
+};
+
+struct mlx5_tc_ct_pre {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *flow_grp;
+ struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_handle *miss_rule;
+ struct mlx5_modify_hdr *modify_hdr;
+};
+
+struct mlx5_ct_ft {
+ struct rhash_head node;
+ u16 zone;
+ u32 zone_restore_id;
+ refcount_t refcount;
+ struct nf_flowtable *nf_ft;
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct rhashtable ct_entries_ht;
+ struct mlx5_tc_ct_pre pre_ct;
+ struct mlx5_tc_ct_pre pre_ct_nat;
+};
+
+struct mlx5_ct_tuple {
+ u16 addr_type;
+ __be16 n_proto;
+ u8 ip_proto;
+ struct {
+ union {
+ __be32 src_v4;
+ struct in6_addr src_v6;
+ };
+ union {
+ __be32 dst_v4;
+ struct in6_addr dst_v6;
+ };
+ } ip;
+ struct {
+ __be16 src;
+ __be16 dst;
+ } port;
+
+ u16 zone;
+};
+
+struct mlx5_ct_counter {
+ struct mlx5_fc *counter;
+ refcount_t refcount;
+ bool is_shared;
+};
+
+enum {
+ MLX5_CT_ENTRY_FLAG_VALID,
+};
+
+struct mlx5_ct_entry {
+ struct rhash_head node;
+ struct rhash_head tuple_node;
+ struct rhash_head tuple_nat_node;
+ struct mlx5_ct_counter *counter;
+ unsigned long cookie;
+ unsigned long restore_cookie;
+ struct mlx5_ct_tuple tuple;
+ struct mlx5_ct_tuple tuple_nat;
+ struct mlx5_ct_zone_rule zone_rules[2];
+
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct work_struct work;
+
+ refcount_t refcnt;
+ unsigned long flags;
+};
+
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_mod_hdr_handle *mh);
+
+static const struct rhashtable_params cts_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, node),
+ .key_offset = offsetof(struct mlx5_ct_entry, cookie),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static const struct rhashtable_params zone_params = {
+ .head_offset = offsetof(struct mlx5_ct_ft, node),
+ .key_offset = offsetof(struct mlx5_ct_ft, zone),
+ .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
+ .automatic_shrinking = true,
+};
+
+static const struct rhashtable_params tuples_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
+ .key_offset = offsetof(struct mlx5_ct_entry, tuple),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static const struct rhashtable_params tuples_nat_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
+ .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static bool
+mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
+{
+ return !!(entry->tuple_nat_node.next);
+}
+
+static int
+mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
+ u32 *labels, u32 *id)
+{
+ if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
+ *id = 0;
+ return 0;
+ }
+
+ if (mapping_add(ct_priv->labels_mapping, labels, id))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static void
+mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
+{
+ if (id)
+ mapping_remove(ct_priv->labels_mapping, id);
+}
+
+static int
+mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
+{
+ struct flow_match_control control;
+ struct flow_match_basic basic;
+
+ flow_rule_match_basic(rule, &basic);
+ flow_rule_match_control(rule, &control);
+
+ tuple->n_proto = basic.key->n_proto;
+ tuple->ip_proto = basic.key->ip_proto;
+ tuple->addr_type = control.key->addr_type;
+
+ if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ tuple->ip.src_v4 = match.key->src;
+ tuple->ip.dst_v4 = match.key->dst;
+ } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ tuple->ip.src_v6 = match.key->src;
+ tuple->ip.dst_v6 = match.key->dst;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (tuple->ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ tuple->port.src = match.key->src;
+ tuple->port.dst = match.key->dst;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ } else {
+ if (tuple->ip_proto != IPPROTO_GRE)
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
+ struct flow_rule *rule)
+{
+ struct flow_action *flow_action = &rule->action;
+ struct flow_action_entry *act;
+ u32 offset, val, ip6_offset;
+ int i;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id != FLOW_ACTION_MANGLE)
+ continue;
+
+ offset = act->mangle.offset;
+ val = act->mangle.val;
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ if (offset == offsetof(struct iphdr, saddr))
+ tuple->ip.src_v4 = cpu_to_be32(val);
+ else if (offset == offsetof(struct iphdr, daddr))
+ tuple->ip.dst_v4 = cpu_to_be32(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
+ ip6_offset /= 4;
+ if (ip6_offset < 4)
+ tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
+ else if (ip6_offset < 8)
+ tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ if (offset == offsetof(struct tcphdr, source))
+ tuple->port.src = cpu_to_be16(val);
+ else if (offset == offsetof(struct tcphdr, dest))
+ tuple->port.dst = cpu_to_be16(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ if (offset == offsetof(struct udphdr, source))
+ tuple->port.src = cpu_to_be16(val);
+ else if (offset == offsetof(struct udphdr, dest))
+ tuple->port.dst = cpu_to_be16(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct net_device *ndev)
+{
+ struct mlx5e_priv *other_priv = netdev_priv(ndev);
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ bool vf_rep, uplink_rep;
+
+ vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+ uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+
+ if (vf_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ if (uplink_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ if (is_vlan_dev(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
+ if (netif_is_macvlan(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
+ if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
+}
+
+static int
+mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_rule *rule)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+
+ mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ match.mask->ip_proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ match.key->ip_proto);
+
+ ip_proto = match.key->ip_proto;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_match_control match;
+
+ flow_rule_match_control(rule, &match);
+ addr_type = match.key->addr_type;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.key->src, sizeof(match.key->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, sizeof(match.key->src));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_dport, ntohs(match.key->dst));
+ break;
+
+ case IPPROTO_UDP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(match.key->dst));
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_match_tcp match;
+
+ flow_rule_match_tcp(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(match.mask->flags));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
+ ntohs(match.key->flags));
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+ struct flow_match_meta match;
+
+ flow_rule_match_meta(rule, &match);
+
+ if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
+ if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
+ spec->flow_context.flow_source =
+ mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
+
+ dev_put(dev);
+ }
+ }
+
+ return 0;
+}
+
+static void
+mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
+{
+ if (entry->counter->is_shared &&
+ !refcount_dec_and_test(&entry->counter->refcount))
+ return;
+
+ mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
+ kfree(entry->counter);
+}
+
+static void
+mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry,
+ bool nat)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5_flow_attr *attr = zone_rule->attr;
+
+ ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
+
+ ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
+ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+ kfree(attr);
+}
+
+static void
+mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+
+ atomic_dec(&ct_priv->debugfs.stats.offloaded);
+}
+
+static struct flow_action_entry *
+mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct flow_action_entry *act;
+ int i;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT_METADATA)
+ return act;
+ }
+
+ return NULL;
+}
+
+static int
+mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ u8 ct_state,
+ u32 mark,
+ u32 labels_id,
+ u8 zone_restore_id)
+{
+ enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ int err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ CTSTATE_TO_REG, ct_state);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ MARK_TO_REG, mark);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ LABELS_TO_REG, labels_id);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+
+ /* Make another copy of zone id in reg_b for
+ * NIC rx flows since we don't copy reg_c1 to
+ * reg_b upon miss.
+ */
+ if (ns != MLX5_FLOW_NAMESPACE_FDB) {
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+}
+
+static int
+mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
+ char *modact)
+{
+ u32 offset = act->mangle.offset, field;
+
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct iphdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
+ else if (offset == offsetof(struct iphdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct ipv6hdr, saddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct tcphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
+ else if (offset == offsetof(struct tcphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct udphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
+ else if (offset == offsetof(struct udphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, offset, 0);
+ MLX5_SET(set_action_in, modact, field, field);
+ MLX5_SET(set_action_in, modact, data, act->mangle.val);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ struct flow_action_entry *act;
+ char *modact;
+ int err, i;
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_MANGLE: {
+ modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
+ if (IS_ERR(modact))
+ return PTR_ERR(modact);
+
+ err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
+ if (err)
+ return err;
+
+ mod_acts->num_actions++;
+ }
+ break;
+
+ case FLOW_ACTION_CT_METADATA:
+ /* Handled earlier */
+ continue;
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule,
+ struct mlx5e_mod_hdr_handle **mh,
+ u8 zone_restore_id, bool nat_table, bool has_nat)
+{
+ DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
+ DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
+ struct flow_action_entry *meta;
+ u16 ct_state = 0;
+ int err;
+
+ meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta)
+ return -EOPNOTSUPP;
+
+ err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
+ &attr->ct_attr.ct_labels_id);
+ if (err)
+ return -EOPNOTSUPP;
+ if (nat_table) {
+ if (has_nat) {
+ err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
+ if (err)
+ goto err_mapping;
+ }
+
+ ct_state |= MLX5_CT_STATE_NAT_BIT;
+ }
+
+ ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
+ ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
+ err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
+ ct_state,
+ meta->ct_metadata.mark,
+ attr->ct_attr.ct_labels_id,
+ zone_restore_id);
+ if (err)
+ goto err_mapping;
+
+ if (nat_table && has_nat) {
+ attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
+ mod_acts.num_actions,
+ mod_acts.actions);
+ if (IS_ERR(attr->modify_hdr)) {
+ err = PTR_ERR(attr->modify_hdr);
+ goto err_mapping;
+ }
+
+ *mh = NULL;
+ } else {
+ *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl,
+ ct_priv->ns_type,
+ &mod_acts);
+ if (IS_ERR(*mh)) {
+ err = PTR_ERR(*mh);
+ goto err_mapping;
+ }
+ attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
+ }
+
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ return 0;
+
+err_mapping:
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+ return err;
+}
+
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_mod_hdr_handle *mh)
+{
+ if (mh)
+ mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
+ else
+ mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry,
+ bool nat, u8 zone_restore_id)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+ struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_flow_attr *attr;
+ int err;
+
+ zone_rule->nat = nat;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+
+ err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
+ &zone_rule->mh,
+ zone_restore_id,
+ nat,
+ mlx5_tc_ct_entry_has_nat(entry));
+ if (err) {
+ ct_dbg("Failed to create ct entry mod hdr");
+ goto err_mod_hdr;
+ }
+
+ attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = 0;
+ attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ if (entry->tuple.ip_proto == IPPROTO_TCP ||
+ entry->tuple.ip_proto == IPPROTO_UDP)
+ attr->outer_match_level = MLX5_MATCH_L4;
+ else
+ attr->outer_match_level = MLX5_MATCH_L3;
+ attr->counter = entry->counter->counter;
+ attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->in_mdev = priv->mdev;
+
+ mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
+
+ zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
+ if (IS_ERR(zone_rule->rule)) {
+ err = PTR_ERR(zone_rule->rule);
+ ct_dbg("Failed to add ct entry rule, nat: %d", nat);
+ goto err_rule;
+ }
+
+ zone_rule->attr = attr;
+
+ kvfree(spec);
+ ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
+
+ return 0;
+
+err_rule:
+ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+err_mod_hdr:
+ kfree(attr);
+err_attr:
+ kvfree(spec);
+ return err;
+}
+
+static bool
+mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
+{
+ return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+}
+
+static struct mlx5_ct_entry *
+mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
+{
+ struct mlx5_ct_entry *entry;
+
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
+ tuples_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt)) {
+ return entry;
+ } else if (!entry) {
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
+ tuple, tuples_nat_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt))
+ return entry;
+ }
+
+ return entry ? ERR_PTR(-EINVAL) : NULL;
+}
+
+static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
+ tuples_ht_params);
+}
+
+static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_remove_from_tuples(entry);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ kfree(entry);
+}
+
+static void
+mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
+{
+ struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void
+__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
+ queue_work(entry->ct_priv->wq, &entry->work);
+}
+
+static struct mlx5_ct_counter *
+mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_ct_counter *counter;
+ int ret;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return ERR_PTR(-ENOMEM);
+
+ counter->is_shared = false;
+ counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
+ if (IS_ERR(counter->counter)) {
+ ct_dbg("Failed to create counter for ct entry");
+ ret = PTR_ERR(counter->counter);
+ kfree(counter);
+ return ERR_PTR(ret);
+ }
+
+ return counter;
+}
+
+static struct mlx5_ct_counter *
+mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ struct mlx5_ct_tuple rev_tuple = entry->tuple;
+ struct mlx5_ct_counter *shared_counter;
+ struct mlx5_ct_entry *rev_entry;
+
+ /* get the reversed tuple */
+ swap(rev_tuple.port.src, rev_tuple.port.dst);
+
+ if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ __be32 tmp_addr = rev_tuple.ip.src_v4;
+
+ rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
+ rev_tuple.ip.dst_v4 = tmp_addr;
+ } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
+
+ rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
+ rev_tuple.ip.dst_v6 = tmp_addr;
+ } else {
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* Use the same counter as the reverse direction */
+ spin_lock_bh(&ct_priv->ht_lock);
+ rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
+
+ if (IS_ERR(rev_entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ goto create_counter;
+ }
+
+ if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
+ ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
+ shared_counter = rev_entry->counter;
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(rev_entry);
+ return shared_counter;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+create_counter:
+
+ shared_counter = mlx5_tc_ct_counter_create(ct_priv);
+ if (IS_ERR(shared_counter))
+ return shared_counter;
+
+ shared_counter->is_shared = true;
+ refcount_set(&shared_counter->refcount, 1);
+ return shared_counter;
+}
+
+static int
+mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry,
+ u8 zone_restore_id)
+{
+ int err;
+
+ if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
+ entry->counter = mlx5_tc_ct_counter_create(ct_priv);
+ else
+ entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
+
+ if (IS_ERR(entry->counter)) {
+ err = PTR_ERR(entry->counter);
+ return err;
+ }
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
+ zone_restore_id);
+ if (err)
+ goto err_orig;
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
+ zone_restore_id);
+ if (err)
+ goto err_nat;
+
+ atomic_inc(&ct_priv->debugfs.stats.offloaded);
+ return 0;
+
+err_nat:
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+err_orig:
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ struct flow_action_entry *meta_action;
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+ int err;
+
+ meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta_action)
+ return -EOPNOTSUPP;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (entry && refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_put(entry);
+ return -EEXIST;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->tuple.zone = ft->zone;
+ entry->cookie = flow->cookie;
+ entry->restore_cookie = meta_action->ct_metadata.cookie;
+ refcount_set(&entry->refcnt, 2);
+ entry->ct_priv = ct_priv;
+
+ err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
+ if (err)
+ goto err_set;
+
+ memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
+ err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
+ if (err)
+ goto err_set;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+
+ err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
+ cts_ht_params);
+ if (err)
+ goto err_entries;
+
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
+ if (err)
+ goto err_tuple;
+
+ if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ if (err)
+ goto err_tuple_nat;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
+ ft->zone_restore_id);
+ if (err)
+ goto err_rules;
+
+ set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+ mlx5_tc_ct_entry_put(entry); /* this function reference */
+
+ return 0;
+
+err_rules:
+ spin_lock_bh(&ct_priv->ht_lock);
+ if (mlx5_tc_ct_entry_has_nat(entry))
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node, tuples_nat_ht_params);
+err_tuple_nat:
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
+err_tuple:
+ rhashtable_remove_fast(&ft->ct_entries_ht,
+ &entry->node,
+ cts_ht_params);
+err_entries:
+ spin_unlock_bh(&ct_priv->ht_lock);
+err_set:
+ kfree(entry);
+ if (err != -EEXIST)
+ netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(entry);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *f)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ unsigned long cookie = f->cookie;
+ struct mlx5_ct_entry *entry;
+ u64 lastuse, packets, bytes;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
+ flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+
+ mlx5_tc_ct_entry_put(entry);
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload *f = type_data;
+ struct mlx5_ct_ft *ft = cb_priv;
+
+ if (type != TC_SETUP_CLSFLOWER)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5_tc_ct_block_flow_offload_add(ft, f);
+ case FLOW_CLS_DESTROY:
+ return mlx5_tc_ct_block_flow_offload_del(ft, f);
+ case FLOW_CLS_STATS:
+ return mlx5_tc_ct_block_flow_offload_stats(ft, f);
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static bool
+mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
+ u16 zone)
+{
+ struct flow_keys flow_keys;
+
+ skb_reset_network_header(skb);
+ skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
+
+ tuple->zone = zone;
+
+ if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
+ flow_keys.basic.ip_proto != IPPROTO_UDP &&
+ flow_keys.basic.ip_proto != IPPROTO_GRE)
+ return false;
+
+ if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
+ flow_keys.basic.ip_proto == IPPROTO_UDP) {
+ tuple->port.src = flow_keys.ports.src;
+ tuple->port.dst = flow_keys.ports.dst;
+ }
+ tuple->n_proto = flow_keys.basic.n_proto;
+ tuple->ip_proto = flow_keys.basic.ip_proto;
+
+ switch (flow_keys.basic.n_proto) {
+ case htons(ETH_P_IP):
+ tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
+ tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
+ break;
+
+ case htons(ETH_P_IPV6):
+ tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
+ tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
+ break;
+ default:
+ goto out;
+ }
+
+ return true;
+
+out:
+ return false;
+}
+
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
+{
+ u32 ctstate = 0, ctstate_mask = 0;
+
+ mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
+ &ctstate, &ctstate_mask);
+
+ if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
+ return -EOPNOTSUPP;
+
+ ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
+ ctstate, ctstate_mask);
+
+ return 0;
+}
+
+void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
+{
+ if (!priv || !ct_attr->ct_labels_id)
+ return;
+
+ mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
+}
+
+int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack)
+{
+ bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_dissector_key_ct *mask, *key;
+ u32 ctstate = 0, ctstate_mask = 0;
+ u16 ct_state_on, ct_state_off;
+ u16 ct_state, ct_state_mask;
+ struct flow_match_ct match;
+ u32 ct_labels[4];
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
+ return 0;
+
+ if (!priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct matching isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_ct(rule, &match);
+
+ key = match.key;
+ mask = match.mask;
+
+ ct_state = key->ct_state;
+ ct_state_mask = mask->ct_state;
+
+ if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+ TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
+ TCA_FLOWER_KEY_CT_FLAGS_NEW |
+ TCA_FLOWER_KEY_CT_FLAGS_REPLY |
+ TCA_FLOWER_KEY_CT_FLAGS_RELATED |
+ TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "only ct_state trk, est, new and rpl are supported for offload");
+ return -EOPNOTSUPP;
+ }
+
+ ct_state_on = ct_state & ct_state_mask;
+ ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
+ trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
+ est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+ rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+ inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+ untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+ unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+ uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+
+ ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
+ ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
+ ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
+ ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
+
+ if (rel) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +rel isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (inv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +inv isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (new) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +new isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (mask->ct_zone)
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ key->ct_zone, MLX5_CT_ZONE_MASK);
+ if (ctstate_mask)
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
+ ctstate, ctstate_mask);
+ if (mask->ct_mark)
+ mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
+ key->ct_mark, mask->ct_mark);
+ if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
+ mask->ct_labels[3]) {
+ ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
+ ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
+ ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
+ ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
+ if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
+ return -EOPNOTSUPP;
+ mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
+ MLX5_CT_LABELS_MASK);
+ }
+
+ return 0;
+}
+
+int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (!priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct action isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ attr->ct_attr.zone = act->ct.zone;
+ attr->ct_attr.ct_action = act->ct.action;
+ attr->ct_attr.nf_ft = act->ct.flow_table;
+
+ return 0;
+}
+
+static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table *ft = pre_ct->ft;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 ctstate;
+ u16 zone;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
+ err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
+ ZONE_TO_REG, zone);
+ if (err) {
+ ct_dbg("Failed to set zone register mapping");
+ goto err_mapping;
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
+ pre_mod_acts.num_actions,
+ pre_mod_acts.actions);
+
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct->modify_hdr = mod_hdr;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.modify_hdr = mod_hdr;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+ /* add flow rule */
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ zone, MLX5_CT_ZONE_MASK);
+ ctstate = MLX5_CT_STATE_TRK_BIT;
+ if (nat)
+ ctstate |= MLX5_CT_STATE_NAT_BIT;
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
+
+ dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct flow rule zone %d", zone);
+ goto err_flow_rule;
+ }
+ pre_ct->flow_rule = rule;
+
+ /* add miss rule */
+ dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct miss rule zone %d", zone);
+ goto err_miss_rule;
+ }
+ pre_ct->miss_rule = rule;
+
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
+ kvfree(spec);
+ return 0;
+
+err_miss_rule:
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+err_flow_rule:
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+err_mapping:
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
+ kvfree(spec);
+ return err;
+}
+
+static void
+tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+ mlx5_del_flow_rules(pre_ct->miss_rule);
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *g;
+ u32 metadata_reg_c_2_mask;
+ u32 *flow_group_in;
+ void *misc;
+ int err;
+
+ ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ ct_dbg("Failed to get flow namespace");
+ return err;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
+ FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to create pre ct table");
+ goto out_free;
+ }
+ pre_ct->ft = ft;
+
+ /* create flow group */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria.misc_parameters_2);
+
+ metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
+ if (nat)
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
+ metadata_reg_c_2_mask);
+
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct group");
+ goto err_flow_grp;
+ }
+ pre_ct->flow_grp = g;
+
+ /* create miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct miss group");
+ goto err_miss_grp;
+ }
+ pre_ct->miss_grp = g;
+
+ err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
+ if (err)
+ goto err_add_rules;
+
+ kvfree(flow_group_in);
+ return 0;
+
+err_add_rules:
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+err_miss_grp:
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+err_flow_grp:
+ mlx5_destroy_flow_table(ft);
+out_free:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+ mlx5_destroy_flow_table(pre_ct->ft);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ int err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
+ if (err)
+ return err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
+ if (err)
+ goto err_pre_ct_nat;
+
+ return 0;
+
+err_pre_ct_nat:
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+}
+
+/* To avoid false lock dependency warning set the ct_entries_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key ct_entries_ht_lock_key;
+
+static struct mlx5_ct_ft *
+mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
+ struct nf_flowtable *nf_ft)
+{
+ struct mlx5_ct_ft *ft;
+ int err;
+
+ ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
+ if (ft) {
+ refcount_inc(&ft->refcount);
+ return ft;
+ }
+
+ ft = kzalloc(sizeof(*ft), GFP_KERNEL);
+ if (!ft)
+ return ERR_PTR(-ENOMEM);
+
+ err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
+ if (err)
+ goto err_mapping;
+
+ ft->zone = zone;
+ ft->nf_ft = nf_ft;
+ ft->ct_priv = ct_priv;
+ refcount_set(&ft->refcount, 1);
+
+ err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
+ if (err)
+ goto err_alloc_pre_ct;
+
+ err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
+ if (err)
+ goto err_init;
+
+ lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
+
+ err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
+ zone_params);
+ if (err)
+ goto err_insert;
+
+ err = nf_flow_table_offload_add_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ if (err)
+ goto err_add_cb;
+
+ return ft;
+
+err_add_cb:
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+err_insert:
+ rhashtable_destroy(&ft->ct_entries_ht);
+err_init:
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+err_alloc_pre_ct:
+ mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
+err_mapping:
+ kfree(ft);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
+{
+ struct mlx5_ct_entry *entry = ptr;
+
+ mlx5_tc_ct_entry_put(entry);
+}
+
+static void
+mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
+{
+ if (!refcount_dec_and_test(&ft->refcount))
+ return;
+
+ flush_workqueue(ct_priv->wq);
+ nf_flow_table_offload_del_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+ rhashtable_free_and_destroy(&ft->ct_entries_ht,
+ mlx5_tc_ct_flush_ft_entry,
+ ct_priv);
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+ mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
+ kfree(ft);
+}
+
+/* We translate the tc filter with CT action to the following HW model:
+ *
+ * +---------------------+
+ * + ft prio (tc chain) +
+ * + original match +
+ * +---------------------+
+ * | set chain miss mapping
+ * | set fte_id
+ * | set tunnel_id
+ * | do decap
+ * v
+ * +---------------------+
+ * + pre_ct/pre_ct_nat + if matches +-------------------------+
+ * + zone+nat match +---------------->+ post_act (see below) +
+ * +---------------------+ set zone +-------------------------+
+ * | set zone
+ * v
+ * +--------------------+
+ * + CT (nat or no nat) +
+ * + tuple + zone match +
+ * +--------------------+
+ * | set mark
+ * | set labels_id
+ * | set established
+ * | set zone_restore
+ * | do nat (if needed)
+ * v
+ * +--------------+
+ * + post_act + original filter actions
+ * + fte_id match +------------------------>
+ * +--------------+
+ */
+static struct mlx5_flow_handle *
+__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *orig_spec,
+ struct mlx5_flow_attr *attr)
+{
+ bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+ struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
+ u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_ct_flow *ct_flow;
+ int chain_mapping = 0, err;
+ struct mlx5_ct_ft *ft;
+
+ ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
+ if (!ct_flow) {
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Register for CT established events */
+ ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
+ attr->ct_attr.nf_ft);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to register to ft callback");
+ goto err_ft;
+ }
+ ct_flow->ft = ft;
+
+ /* Base flow attributes of both rules on original rule attribute */
+ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!ct_flow->pre_ct_attr) {
+ err = -ENOMEM;
+ goto err_alloc_pre;
+ }
+
+ pre_ct_attr = ct_flow->pre_ct_attr;
+ memcpy(pre_ct_attr, attr, attr_sz);
+ pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
+
+ /* Modify the original rule's action to fwd and modify, leave decap */
+ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Write chain miss tag for miss in ct table as we
+ * don't go though all prios of this chain as normal tc rules
+ * miss.
+ */
+ err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
+ &chain_mapping);
+ if (err) {
+ ct_dbg("Failed to get chain register mapping for chain");
+ goto err_get_chain;
+ }
+ ct_flow->chain_mapping = chain_mapping;
+
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
+ CHAIN_TO_REG, chain_mapping);
+ if (err) {
+ ct_dbg("Failed to set chain register mapping");
+ goto err_mapping;
+ }
+
+ /* If original flow is decap, we do it before going into ct table
+ * so add a rewrite for the tunnel match_id.
+ */
+ if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ attr->chain == 0) {
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
+ ct_priv->ns_type,
+ TUNNEL_TO_REG,
+ attr->tunnel_id);
+ if (err) {
+ ct_dbg("Failed to set tunnel register mapping");
+ goto err_mapping;
+ }
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
+ pre_mod_acts->num_actions,
+ pre_mod_acts->actions);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct_attr->modify_hdr = mod_hdr;
+
+ /* Change original rule point to ct table */
+ pre_ct_attr->dest_chain = 0;
+ pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
+ ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
+ pre_ct_attr);
+ if (IS_ERR(ct_flow->pre_ct_rule)) {
+ err = PTR_ERR(ct_flow->pre_ct_rule);
+ ct_dbg("Failed to add pre ct rule");
+ goto err_insert_orig;
+ }
+
+ attr->ct_attr.ct_flow = ct_flow;
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
+
+ return ct_flow->pre_ct_rule;
+
+err_insert_orig:
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
+err_mapping:
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+err_get_chain:
+ kfree(ct_flow->pre_ct_attr);
+err_alloc_pre:
+ mlx5_tc_ct_del_ft_cb(ct_priv, ft);
+err_ft:
+ kfree(ct_flow);
+ netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
+ return ERR_PTR(err);
+}
+
+struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ struct mlx5_flow_handle *rule;
+
+ if (!priv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&priv->control_lock);
+ rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
+ mutex_unlock(&priv->control_lock);
+
+ return rule;
+}
+
+static void
+__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_flow *ct_flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+
+ mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
+
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+ mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
+
+ kfree(ct_flow->pre_ct_attr);
+ kfree(ct_flow);
+}
+
+void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
+
+ /* We are called on error to clean up stuff from parsing
+ * but we don't have anything for now
+ */
+ if (!ct_flow)
+ return;
+
+ mutex_lock(&priv->control_lock);
+ __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
+ mutex_unlock(&priv->control_lock);
+}
+
+static int
+mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
+ int err;
+
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
+ ct_dbg("Using SMFS ct flow steering provider");
+ fs_ops = mlx5_ct_fs_smfs_ops_get();
+ }
+
+ ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
+ if (!ct_priv->fs)
+ return -ENOMEM;
+
+ ct_priv->fs->netdev = ct_priv->netdev;
+ ct_priv->fs->dev = ct_priv->dev;
+ ct_priv->fs_ops = fs_ops;
+
+ err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
+ if (err)
+ goto err_init;
+
+ return 0;
+
+err_init:
+ kfree(ct_priv->fs);
+ return err;
+}
+
+static int
+mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
+ const char **err_msg)
+{
+ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
+ /* vlan workaround should be avoided for multi chain rules.
+ * This is just a sanity check as pop vlan action should
+ * be supported by any FW that supports ignore_flow_level
+ */
+
+ *err_msg = "firmware vlan actions support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
+ fdb_modify_header_fwd_to_table)) {
+ /* CT always writes to registers which are mod header actions.
+ * Therefore, mod header and goto is required
+ */
+
+ *err_msg = "firmware fwd and modify support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ *err_msg = "register loopback isn't supported";
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ const char *err_msg = NULL;
+ int err = 0;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ /* cannot restore chain ID on HW miss */
+
+ err_msg = "tc skb extension missing";
+ err = -EOPNOTSUPP;
+ goto out_err;
+#endif
+ if (IS_ERR_OR_NULL(post_act)) {
+ /* Ignore_flow_level support isn't supported by default for VFs and so post_act
+ * won't be supported. Skip showing error msg.
+ */
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
+ err_msg = "post action is missing";
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
+
+out_err:
+ if (err && err_msg)
+ netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
+ return err;
+}
+
+static void
+mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
+
+ ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev));
+ debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.offloaded);
+ debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.rx_dropped);
+}
+
+static void
+mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ debugfs_remove_recursive(ct_priv->debugfs.root);
+}
+
+#define INIT_ERR_PREFIX "tc ct offload init failed"
+
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct mlx5_core_dev *dev;
+ u64 mapping_id;
+ int err;
+
+ dev = priv->mdev;
+ err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
+ if (err)
+ goto err_support;
+
+ ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
+ if (!ct_priv)
+ goto err_alloc;
+
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+ sizeof(u16), 0, true);
+ if (IS_ERR(ct_priv->zone_mapping)) {
+ err = PTR_ERR(ct_priv->zone_mapping);
+ goto err_mapping_zone;
+ }
+
+ ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+ sizeof(u32) * 4, 0, true);
+ if (IS_ERR(ct_priv->labels_mapping)) {
+ err = PTR_ERR(ct_priv->labels_mapping);
+ goto err_mapping_labels;
+ }
+
+ spin_lock_init(&ct_priv->ht_lock);
+ ct_priv->ns_type = ns_type;
+ ct_priv->chains = chains;
+ ct_priv->netdev = priv->netdev;
+ ct_priv->dev = priv->mdev;
+ ct_priv->mod_hdr_tbl = mod_hdr;
+ ct_priv->ct = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(ct_priv->ct)) {
+ err = PTR_ERR(ct_priv->ct);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct table err: %d\n",
+ INIT_ERR_PREFIX, err);
+ goto err_ct_tbl;
+ }
+
+ ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(ct_priv->ct_nat)) {
+ err = PTR_ERR(ct_priv->ct_nat);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct nat table err: %d\n",
+ INIT_ERR_PREFIX, err);
+ goto err_ct_nat_tbl;
+ }
+
+ ct_priv->post_act = post_act;
+ mutex_init(&ct_priv->control_lock);
+ if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
+ goto err_ct_zone_ht;
+ if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
+ goto err_ct_tuples_ht;
+ if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
+ goto err_ct_tuples_nat_ht;
+
+ ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
+ if (!ct_priv->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ err = mlx5_tc_ct_fs_init(ct_priv);
+ if (err)
+ goto err_init_fs;
+
+ mlx5_ct_tc_create_dbgfs(ct_priv);
+ return ct_priv;
+
+err_init_fs:
+ destroy_workqueue(ct_priv->wq);
+err_wq:
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
+err_ct_tuples_nat_ht:
+ rhashtable_destroy(&ct_priv->ct_tuples_ht);
+err_ct_tuples_ht:
+ rhashtable_destroy(&ct_priv->zone_ht);
+err_ct_zone_ht:
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+err_ct_nat_tbl:
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
+err_ct_tbl:
+ mapping_destroy(ct_priv->labels_mapping);
+err_mapping_labels:
+ mapping_destroy(ct_priv->zone_mapping);
+err_mapping_zone:
+ kfree(ct_priv);
+err_alloc:
+err_support:
+
+ return NULL;
+}
+
+void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_fs_chains *chains;
+
+ if (!ct_priv)
+ return;
+
+ destroy_workqueue(ct_priv->wq);
+ mlx5_ct_tc_remove_dbgfs(ct_priv);
+ chains = ct_priv->chains;
+
+ ct_priv->fs_ops->destroy(ct_priv->fs);
+ kfree(ct_priv->fs);
+
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
+ mapping_destroy(ct_priv->zone_mapping);
+ mapping_destroy(ct_priv->labels_mapping);
+
+ rhashtable_destroy(&ct_priv->ct_tuples_ht);
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
+ rhashtable_destroy(&ct_priv->zone_ht);
+ mutex_destroy(&ct_priv->control_lock);
+ kfree(ct_priv);
+}
+
+bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id)
+{
+ struct mlx5_ct_tuple tuple = {};
+ struct mlx5_ct_entry *entry;
+ u16 zone;
+
+ if (!ct_priv || !zone_restore_id)
+ return true;
+
+ if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
+ goto out_inc_drop;
+
+ if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
+ goto out_inc_drop;
+
+ spin_lock(&ct_priv->ht_lock);
+
+ entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
+ if (!entry) {
+ spin_unlock(&ct_priv->ht_lock);
+ goto out_inc_drop;
+ }
+
+ if (IS_ERR(entry)) {
+ spin_unlock(&ct_priv->ht_lock);
+ goto out_inc_drop;
+ }
+ spin_unlock(&ct_priv->ht_lock);
+
+ tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
+ __mlx5_tc_ct_entry_put(entry);
+
+ return true;
+
+out_inc_drop:
+ atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
+ return false;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
new file mode 100644
index 000000000..5bbd6b928
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_CT_H__
+#define __MLX5_EN_TC_CT_H__
+
+#include <net/pkt_cls.h>
+#include <linux/mlx5/fs.h>
+#include <net/tc_act/tc_ct.h>
+
+#include "en.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_tc_mod_hdr_acts;
+struct mlx5_rep_uplink_priv;
+struct mlx5e_tc_flow;
+struct mlx5e_priv;
+
+struct mlx5_fs_chains;
+struct mlx5_tc_ct_priv;
+struct mlx5_ct_flow;
+
+struct nf_flowtable;
+
+struct mlx5_ct_attr {
+ u16 zone;
+ u16 ct_action;
+ struct mlx5_ct_flow *ct_flow;
+ struct nf_flowtable *nf_ft;
+ u32 ct_labels_id;
+};
+
+#define zone_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\
+ .moffset = 0,\
+ .mlen = 16,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_2),\
+}
+
+#define ctstate_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\
+ .moffset = 16,\
+ .mlen = 16,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_2),\
+}
+
+#define mark_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\
+ .moffset = 0,\
+ .mlen = 32,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_3),\
+}
+
+#define labels_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\
+ .moffset = 0,\
+ .mlen = 32,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_4),\
+}
+
+/* 8 LSB of metadata C5 are reserved for packet color */
+#define fteid_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\
+ .moffset = 8,\
+ .mlen = 24,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_5),\
+}
+
+#define zone_restore_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\
+ .moffset = 0,\
+ .mlen = ESW_ZONE_ID_BITS,\
+ .soffset = MLX5_BYTE_OFF(fte_match_param,\
+ misc_parameters_2.metadata_reg_c_1),\
+}
+
+#define nic_zone_restore_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\
+ .moffset = 16,\
+ .mlen = ESW_ZONE_ID_BITS,\
+}
+
+#define MLX5_CT_ZONE_BITS MLX5_REG_MAPPING_MBITS(ZONE_TO_REG)
+#define MLX5_CT_ZONE_MASK MLX5_REG_MAPPING_MASK(ZONE_TO_REG)
+
+#if IS_ENABLED(CONFIG_MLX5_TC_CT)
+
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act);
+void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv);
+
+void
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr);
+
+int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack);
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec);
+int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack);
+
+struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id);
+
+int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts);
+
+#else /* CONFIG_MLX5_TC_CT */
+
+static inline struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ return NULL;
+}
+
+static inline void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
+{
+}
+
+static inline void
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) {}
+
+static inline int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
+ return 0;
+
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
+{
+ return 0;
+}
+
+static inline int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
+ return -EOPNOTSUPP;
+}
+
+static inline struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+}
+
+static inline bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id)
+{
+ if (!zone_restore_id)
+ return true;
+
+ return false;
+}
+
+#endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */
+#endif /* __MLX5_EN_TC_CT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
new file mode 100644
index 000000000..2e42d7c54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_PRIV_H__
+#define __MLX5_EN_TC_PRIV_H__
+
+#include "en_tc.h"
+#include "en/tc/act/act.h"
+
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
+
+#define MLX5E_TC_MAX_SPLITS 1
+
+
+enum {
+ MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
+ MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
+ MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
+ MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
+ MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
+ MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
+ MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
+ MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10,
+ MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 11,
+};
+
+struct mlx5e_tc_flow_parse_attr {
+ const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct net_device *filter_dev;
+ struct mlx5_flow_spec spec;
+ struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
+ struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_act_parse_state parse_state;
+};
+
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc);
+
+/* Helper struct for accessing a struct containing list_head array.
+ * Containing struct
+ * |- Helper array
+ * [0] Helper item 0
+ * |- list_head item 0
+ * |- index (0)
+ * [1] Helper item 1
+ * |- list_head item 1
+ * |- index (1)
+ * To access the containing struct from one of the list_head items:
+ * 1. Get the helper item from the list_head item using
+ * helper item =
+ * container_of(list_head item, helper struct type, list_head field)
+ * 2. Get the contining struct from the helper item and its index in the array:
+ * containing struct =
+ * container_of(helper item, containing struct type, helper field[index])
+ */
+struct encap_flow_item {
+ struct mlx5e_encap_entry *e; /* attached encap instance */
+ struct list_head list;
+ int index;
+};
+
+struct encap_route_flow_item {
+ struct mlx5e_route_entry *r; /* attached route instance */
+ int index;
+};
+
+struct mlx5e_tc_flow {
+ struct rhash_head node;
+ struct mlx5e_priv *priv;
+ u64 cookie;
+ unsigned long flags;
+ struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+
+ /* flows sharing the same reformat object - currently mpls decap */
+ struct list_head l3_to_l2_reformat;
+ struct mlx5e_decap_entry *decap_reformat;
+
+ /* flows sharing same route entry */
+ struct list_head decap_routes;
+ struct mlx5e_route_entry *decap_route;
+ struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS];
+
+ /* Flow can be associated with multiple encap IDs.
+ * The number of encaps is bounded by the number of supported
+ * destinations.
+ */
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
+ struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */
+ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
+ struct list_head hairpin; /* flows sharing the same hairpin */
+ struct list_head peer; /* flows with peer flow */
+ struct list_head unready; /* flows not ready to be offloaded (e.g
+ * due to missing route)
+ */
+ struct net_device *orig_dev; /* netdev adding flow first */
+ int tmp_entry_index;
+ struct list_head tmp_list; /* temporary flow list used by neigh update */
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
+ struct completion init_done;
+ struct completion del_hw_done;
+ struct mlx5_flow_attr *attr;
+ struct list_head attrs;
+ u32 chain_mapping;
+};
+
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer);
+
+struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5_flow_attr *
+mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow);
+
+void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow);
+int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow);
+
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow);
+bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow);
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow);
+int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow);
+bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
+
+static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before setting bit. */
+ smp_mb__before_atomic();
+ set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
+ unsigned long flag)
+{
+ /* test_and_set_bit() provides all necessary barriers */
+ return test_and_set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_test_and_set(flow, flag) \
+ __flow_flag_test_and_set(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before clearing bit. */
+ smp_mb__before_atomic();
+ clear_bit(flag, &flow->flags);
+}
+
+#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ bool ret = test_bit(flag, &flow->flags);
+
+ /* Read fields of flow structure only after checking flags. */
+ smp_mb__after_atomic();
+ return ret;
+}
+
+#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow);
+struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow);
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow);
+
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow);
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_get_int_port_priv(struct mlx5e_priv *priv);
+
+struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev);
+
+void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec);
+void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec);
+
+int mlx5e_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_PRIV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
new file mode 100644
index 000000000..83bb0811e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -0,0 +1,991 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/inet_ecn.h>
+#include <net/vxlan.h>
+#include <net/gre.h>
+#include <net/geneve.h>
+#include <net/bareudp.h>
+#include "en/tc_tun.h"
+#include "en/tc_priv.h"
+#include "en_tc.h"
+#include "rep/tc.h"
+#include "rep/neigh.h"
+#include "lag/lag.h"
+#include "lag/mp.h"
+
+struct mlx5e_tc_tun_route_attr {
+ struct net_device *out_dev;
+ struct net_device *route_dev;
+ union {
+ struct flowi4 fl4;
+ struct flowi6 fl6;
+ } fl;
+ struct neighbour *n;
+ u8 ttl;
+};
+
+#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {}
+
+static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr)
+{
+ if (attr->n)
+ neigh_release(attr->n);
+ if (attr->route_dev)
+ dev_put(attr->route_dev);
+}
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
+{
+ if (netif_is_vxlan(tunnel_dev))
+ return &vxlan_tunnel;
+ else if (netif_is_geneve(tunnel_dev))
+ return &geneve_tunnel;
+ else if (netif_is_gretap(tunnel_dev) ||
+ netif_is_ip6gretap(tunnel_dev))
+ return &gre_tunnel;
+ else if (netif_is_bareudp(tunnel_dev))
+ return &mplsoudp_tunnel;
+ else
+ return NULL;
+}
+
+static int get_route_and_out_devs(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct net_device **route_dev,
+ struct net_device **out_dev)
+{
+ struct net_device *uplink_dev, *uplink_upper, *real_dev;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool dst_is_lag_dev;
+
+ real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+
+ rcu_read_lock();
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+ /* mlx5_lag_is_sriov() is a blocking function which can't be called
+ * while holding rcu read lock. Take the net_device for correctness
+ * sake.
+ */
+ if (uplink_upper)
+ dev_hold(uplink_upper);
+ rcu_read_unlock();
+
+ dst_is_lag_dev = (uplink_upper &&
+ netif_is_lag_master(uplink_upper) &&
+ real_dev == uplink_upper &&
+ mlx5_lag_is_sriov(priv->mdev));
+ if (uplink_upper)
+ dev_put(uplink_upper);
+
+ /* if the egress device isn't on the same HW e-switch or
+ * it's a LAG device, use the uplink
+ */
+ *route_dev = dev;
+ if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
+ dst_is_lag_dev || is_vlan_dev(*route_dev) ||
+ netif_is_ovs_master(*route_dev))
+ *out_dev = uplink_dev;
+ else if (mlx5e_eswitch_rep(dev) &&
+ mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
+ *out_dev = *route_dev;
+ else
+ return -EOPNOTSUPP;
+
+ if (!(mlx5e_eswitch_rep(*out_dev) &&
+ mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
+ return -EOPNOTSUPP;
+
+ if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct mlx5e_tc_tun_route_attr *attr)
+{
+ struct net_device *route_dev;
+ struct net_device *out_dev;
+ struct neighbour *n;
+ struct rtable *rt;
+
+#if IS_ENABLED(CONFIG_INET)
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *uplink_dev;
+ int ret;
+
+ if (mlx5_lag_is_multipath(mdev)) {
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
+ } else {
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
+
+ if (tunnel && tunnel->get_remote_ifindex)
+ attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev);
+ }
+
+ rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ if (rt->rt_type != RTN_UNICAST) {
+ ret = -ENETUNREACH;
+ goto err_rt_release;
+ }
+
+ if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
+ ret = -ENETUNREACH;
+ goto err_rt_release;
+ }
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_rt_release;
+ dev_hold(route_dev);
+
+ if (!attr->ttl)
+ attr->ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr);
+ if (!n) {
+ ret = -ENOMEM;
+ goto err_dev_release;
+ }
+
+ ip_rt_put(rt);
+ attr->route_dev = route_dev;
+ attr->out_dev = out_dev;
+ attr->n = n;
+ return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_rt_release:
+ ip_rt_put(rt);
+ return ret;
+}
+
+static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr)
+{
+ mlx5e_tc_tun_route_attr_cleanup(attr);
+}
+
+static const char *mlx5e_netdev_kind(struct net_device *dev)
+{
+ if (dev->rtnl_link_ops)
+ return dev->rtnl_link_ops->kind;
+ else
+ return "unknown";
+}
+
+static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ if (!e->tunnel) {
+ pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
+ return -EOPNOTSUPP;
+ }
+
+ return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
+}
+
+static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
+ struct mlx5e_encap_entry *e,
+ u16 proto)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ char *ip;
+
+ ether_addr_copy(eth->h_dest, e->h_dest);
+ ether_addr_copy(eth->h_source, dev->dev_addr);
+ if (is_vlan_dev(dev)) {
+ struct vlan_hdr *vlan = (struct vlan_hdr *)
+ ((char *)eth + ETH_HLEN);
+ ip = (char *)vlan + VLAN_HLEN;
+ eth->h_proto = vlan_dev_vlan_proto(dev);
+ vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
+ vlan->h_vlan_encapsulated_proto = htons(proto);
+ } else {
+ eth->h_proto = htons(proto);
+ ip = (char *)eth + ETH_HLEN;
+ }
+
+ return ip;
+}
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ int ipv4_encap_size;
+ char *encap_header;
+ struct iphdr *ip;
+ u8 nud_state;
+ int err;
+
+ /* add the IP fields */
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ /* It's important to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv4_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv4_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto destroy_neigh_entry;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+}
+
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ int ipv4_encap_size;
+ char *encap_header;
+ struct iphdr *ip;
+ u8 nud_state;
+ int err;
+
+ /* add the IP fields */
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv4_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv4_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+}
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct mlx5e_tc_tun_route_attr *attr)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
+ struct net_device *route_dev;
+ struct net_device *out_dev;
+ struct dst_entry *dst;
+ struct neighbour *n;
+ int ret;
+
+ if (tunnel && tunnel->get_remote_ifindex)
+ attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev);
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6,
+ NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+
+ if (!attr->ttl)
+ attr->ttl = ip6_dst_hoplimit(dst);
+
+ ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_dst_release;
+
+ dev_hold(route_dev);
+ n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr);
+ if (!n) {
+ ret = -ENOMEM;
+ goto err_dev_release;
+ }
+
+ dst_release(dst);
+ attr->out_dev = out_dev;
+ attr->route_dev = route_dev;
+ attr->n = n;
+ return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_dst_release:
+ dst_release(dst);
+ return ret;
+}
+
+static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr)
+{
+ mlx5e_tc_tun_route_attr_cleanup(attr);
+}
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state;
+ int err;
+
+ attr.ttl = tun_key->ttl;
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ /* It's important to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv6_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv6_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto destroy_neigh_entry;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+}
+
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct mlx5_pkt_reformat_params reformat_params;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state;
+ int err;
+
+ attr.ttl = tun_key->ttl;
+
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv6_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = ipv6_encap_size;
+ reformat_params.data = encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+}
+#endif
+
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *flow_attr,
+ struct net_device *filter_dev)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_int_port *int_port;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ u16 vport_num;
+ int err = 0;
+
+ if (flow_attr->tun_ip_version == 4) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
+ attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
+ err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr);
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->tun_ip_version == 6) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
+ attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
+ err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr);
+ }
+#endif
+ else
+ return 0;
+
+ if (err)
+ return err;
+
+ if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops &&
+ mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) {
+ err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ esw_attr->rx_tun_attr->decap_vport = vport_num;
+ } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
+ int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
+ attr.route_dev->ifindex,
+ MLX5E_TC_INT_PORT_INGRESS);
+ if (IS_ERR(int_port)) {
+ err = PTR_ERR(int_port);
+ goto out;
+ }
+ esw_attr->int_port = int_port;
+ }
+
+out:
+ if (flow_attr->tun_ip_version == 4)
+ mlx5e_route_lookup_ipv4_put(&attr);
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->tun_ip_version == 6)
+ mlx5e_route_lookup_ipv6_put(&attr);
+#endif
+ return err;
+}
+
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
+
+ if (tunnel && tunnel->can_offload(priv))
+ return true;
+ else
+ return false;
+}
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
+
+ if (!tunnel) {
+ e->reformat_type = -1;
+ return -EOPNOTSUPP;
+ }
+
+ return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
+}
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ u8 *match_level)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ struct netlink_ext_ack *extack = f->common.extack;
+ int err = 0;
+
+ if (!tunnel) {
+ netdev_warn(priv->netdev,
+ "decapsulation offload is not supported for %s net device\n",
+ mlx5e_netdev_kind(filter_dev));
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ *match_level = tunnel->match_level;
+
+ if (tunnel->parse_udp_ports) {
+ err = tunnel->parse_udp_ports(priv, spec, f,
+ headers_c, headers_v);
+ if (err)
+ goto out;
+ }
+
+ if (tunnel->parse_tunnel) {
+ err = tunnel->parse_tunnel(priv, spec, f,
+ headers_c, headers_v);
+ if (err)
+ goto out;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_dissector_key_basic key_basic = {};
+ struct flow_dissector_key_basic mask_basic = {
+ .n_proto = htons(0xFFFF),
+ };
+ struct flow_match_basic match_basic = {
+ .key = &key_basic, .mask = &mask_basic,
+ };
+ struct flow_match_control match;
+ u16 addr_type;
+
+ flow_rule_match_enc_control(rule, &match);
+ addr_type = match.key->addr_type;
+
+ /* For tunnel addr_type used same key id`s as for non-tunnel */
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_enc_ipv4_addrs(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(match.key->dst));
+
+ key_basic.n_proto = htons(ETH_P_IP);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
+ } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_enc_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+
+ key_basic.n_proto = htons(ETH_P_IPV6);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ struct flow_match_ip match;
+
+ flow_rule_match_enc_ip(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+ match.mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+ match.key->tos & 0x3);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+ match.mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+ match.key->tos >> 2);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+ match.mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+ match.key->ttl);
+
+ if (match.mask->ttl &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB
+ (priv->mdev,
+ ft_field_support.outer_ipv4_ttl)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on TTL is not supported");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+
+ /* let software handle IP fragments */
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
+
+ return 0;
+
+out:
+ return err;
+}
+
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ /* Full udp dst port must be given */
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "UDP tunnel decap filter must include enc_dst_port condition");
+ netdev_warn(priv->netdev,
+ "UDP tunnel decap filter must include enc_dst_port condition\n");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ if (memchr_inv(&enc_ports.mask->dst, 0xff,
+ sizeof(enc_ports.mask->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "UDP tunnel decap filter must match enc_dst_port fully");
+ netdev_warn(priv->netdev,
+ "UDP tunnel decap filter must match enc_dst_port fully\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* match on UDP protocol and dst port number */
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+ ntohs(enc_ports.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+ ntohs(enc_ports.key->dst));
+
+ /* UDP src port on outer header is generated by HW,
+ * so it is probably a bad idea to request matching it.
+ * Nonetheless, it is allowed.
+ */
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+ ntohs(enc_ports.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+ ntohs(enc_ports.key->src));
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
new file mode 100644
index 000000000..b38f693bb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUNNEL_H__
+#define __MLX5_EN_TC_TUNNEL_H__
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/fs.h>
+#include <net/pkt_cls.h>
+#include <linux/netlink.h>
+#include "en.h"
+#include "en_rep.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+enum {
+ MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
+ MLX5E_TC_TUNNEL_TYPE_VXLAN,
+ MLX5E_TC_TUNNEL_TYPE_GENEVE,
+ MLX5E_TC_TUNNEL_TYPE_GRETAP,
+ MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
+};
+
+struct mlx5e_encap_key {
+ const struct ip_tunnel_key *ip_tun_key;
+ struct mlx5e_tc_tunnel *tc_tunnel;
+};
+
+struct mlx5e_tc_tunnel {
+ int tunnel_type;
+ enum mlx5_flow_match_level match_level;
+
+ bool (*can_offload)(struct mlx5e_priv *priv);
+ int (*calc_hlen)(struct mlx5e_encap_entry *e);
+ int (*init_encap_attr)(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack);
+ int (*generate_ip_tun_hdr)(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e);
+ int (*parse_udp_ports)(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+ int (*parse_tunnel)(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+ bool (*encap_info_equal)(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b);
+ int (*get_remote_ifindex)(struct net_device *mirred_dev);
+};
+
+extern struct mlx5e_tc_tunnel vxlan_tunnel;
+extern struct mlx5e_tc_tunnel geneve_tunnel;
+extern struct mlx5e_tc_tunnel gre_tunnel;
+extern struct mlx5e_tc_tunnel mplsoudp_tunnel;
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack);
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+#else
+static inline int
+mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
+static inline int
+mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
+#endif
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct net_device *filter_dev);
+
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev);
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ u8 *match_level);
+
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+
+bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b);
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
new file mode 100644
index 000000000..907ad6ffe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -0,0 +1,1766 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <net/fib_notifier.h>
+#include <net/nexthop.h>
+#include "tc_tun_encap.h"
+#include "en_tc.h"
+#include "tc_tun.h"
+#include "rep/tc.h"
+#include "diag/en_tc_tracepoint.h"
+
+enum {
+ MLX5E_ROUTE_ENTRY_VALID = BIT(0),
+};
+
+static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_encap_entry *e,
+ int out_index)
+{
+ struct net_device *route_dev;
+ int err = 0;
+
+ route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
+
+ if (!route_dev || !netif_is_ovs_master(route_dev) ||
+ attr->parse_attr->filter_dev == e->out_dev)
+ goto out;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
+ MLX5E_TC_INT_PORT_EGRESS,
+ &attr->action, out_index);
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+
+ return err;
+}
+
+struct mlx5e_route_key {
+ int ip_version;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } endpoint_ip;
+};
+
+struct mlx5e_route_entry {
+ struct mlx5e_route_key key;
+ struct list_head encap_entries;
+ struct list_head decap_flows;
+ u32 flags;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ int tunnel_dev_index;
+ struct rcu_head rcu;
+};
+
+struct mlx5e_tc_tun_encap {
+ struct mlx5e_priv *priv;
+ struct notifier_block fib_nb;
+ spinlock_t route_lock; /* protects route_tbl */
+ unsigned long route_tbl_last_update;
+ DECLARE_HASHTABLE(route_tbl, 8);
+};
+
+static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
+{
+ return r->flags & MLX5E_ROUTE_ENTRY_VALID;
+}
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ struct mlx5_rx_tun_attr *tun_attr;
+ void *daddr, *saddr;
+ u8 ip_version;
+
+ tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
+ if (!tun_attr)
+ return -ENOMEM;
+
+ esw_attr->rx_tun_attr = tun_attr;
+ ip_version = mlx5e_tc_get_ip_version(spec, true);
+
+ if (ip_version == 4) {
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ tun_attr->dst_ip.v4 = *(__be32 *)daddr;
+ tun_attr->src_ip.v4 = *(__be32 *)saddr;
+ if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
+ return 0;
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (ip_version == 6) {
+ int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
+ memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
+ if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
+ ipv6_addr_any(&tun_attr->src_ip.v6))
+ return 0;
+ }
+#endif
+ /* Only set the flag if both src and dst ip addresses exist. They are
+ * required to establish routing.
+ */
+ flow_flag_set(flow, TUN_RX);
+ flow->attr->tun_ip_version = ip_version;
+ return 0;
+}
+
+static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
+{
+ bool all_flow_encaps_valid = true;
+ int i;
+
+ /* Flow can be associated with multiple encap entries.
+ * Before offloading the flow verify that all of them have
+ * a valid neighbour.
+ */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+ all_flow_encaps_valid = false;
+ break;
+ }
+ }
+
+ return all_flow_encaps_valid;
+}
+
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
+ return;
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = e->reformat_type;
+ reformat_params.size = e->encap_size;
+ reformat_params.data = e->encap_header;
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
+ PTR_ERR(e->pkt_reformat));
+ return;
+ }
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
+ continue;
+
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+
+ /* Do not offload flows with unresolved neighbors */
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ continue;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ continue;
+ }
+
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
+ if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ flow->rule[0] = rule;
+ /* was unset when slow path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
+
+ /* Clear pkt_reformat before checking slow path flag. Because
+ * in next iteration, the same flow is already set slow path
+ * flag, but still need to clear the pkt_reformat.
+ */
+ if (flow_flag_test(flow, SLOW))
+ continue;
+
+ /* update from encap rule to slow path rule */
+ spec = &flow->attr->parse_attr->spec;
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ flow->rule[0] = rule;
+ /* was unset when fast path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+
+ /* we know that the encap is valid */
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ e->pkt_reformat = NULL;
+}
+
+static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
+ struct list_head *flow_list,
+ int index)
+{
+ if (IS_ERR(mlx5e_flow_get(flow))) {
+ /* Flow is being deleted concurrently. Wait for it to be
+ * unoffloaded from hardware, otherwise deleting encap will
+ * fail.
+ */
+ wait_for_completion(&flow->del_hw_done);
+ return;
+ }
+ wait_for_completion(&flow->init_done);
+
+ flow->tmp_entry_index = index;
+ list_add(&flow->tmp_list, flow_list);
+}
+
+/* Takes reference to all flows attached to encap and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
+{
+ struct encap_flow_item *efi;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
+ mlx5e_take_tmp_flow(flow, flow_list, efi->index);
+ }
+}
+
+/* Takes reference to all flows attached to route and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
+ struct list_head *flow_list)
+{
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, &r->decap_flows, decap_routes)
+ mlx5e_take_tmp_flow(flow, flow_list, 0);
+}
+
+typedef bool (match_cb)(struct mlx5e_encap_entry *);
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e,
+ match_cb match)
+{
+ struct mlx5e_encap_entry *next = NULL;
+
+retry:
+ rcu_read_lock();
+
+ /* find encap with non-zero reference counter value */
+ for (next = e ?
+ list_next_or_null_rcu(&nhe->encap_list,
+ &e->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list) :
+ list_first_or_null_rcu(&nhe->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list);
+ next;
+ next = list_next_or_null_rcu(&nhe->encap_list,
+ &next->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list))
+ if (mlx5e_encap_take(next))
+ break;
+
+ rcu_read_unlock();
+
+ /* release starting encap */
+ if (e)
+ mlx5e_encap_put(netdev_priv(e->out_dev), e);
+ if (!next)
+ return next;
+
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&next->res_ready);
+ /* continue searching if encap entry is not in valid state after completion */
+ if (!match(next)) {
+ e = next;
+ goto retry;
+ }
+
+ return next;
+}
+
+static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
+{
+ return e->flags & MLX5_ENCAP_ENTRY_VALID;
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e)
+{
+ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
+}
+
+static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
+{
+ return e->compl_result >= 0;
+}
+
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e)
+{
+ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
+}
+
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+ struct mlx5e_encap_entry *e = NULL;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_fc *counter;
+ struct neigh_table *tbl;
+ bool neigh_used = false;
+ struct neighbour *n;
+ u64 lastuse;
+
+ if (m_neigh->family == AF_INET)
+ tbl = &arp_tbl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (m_neigh->family == AF_INET6)
+ tbl = ipv6_stub->nd_tbl;
+#endif
+ else
+ return;
+
+ /* mlx5e_get_next_valid_encap() releases previous encap before returning
+ * next one.
+ */
+ while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
+ struct mlx5e_priv *priv = netdev_priv(e->out_dev);
+ struct encap_flow_item *efi, *tmp;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow,
+ encaps[efi->index]);
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+ list_add(&flow->tmp_list, &flow_list);
+
+ if (mlx5e_is_offloaded_flow(flow)) {
+ counter = mlx5e_tc_get_counter(flow);
+ lastuse = mlx5_fc_query_lastuse(counter);
+ if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
+ neigh_used = true;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ if (neigh_used) {
+ /* release current encap before breaking the loop */
+ mlx5e_encap_put(priv, e);
+ break;
+ }
+ }
+
+ trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
+
+ if (neigh_used) {
+ nhe->reported_lastuse = jiffies;
+
+ /* find the relevant neigh according to the cached device and
+ * dst ip pair
+ */
+ n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
+ if (!n)
+ return;
+
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+}
+
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ WARN_ON(!list_empty(&e->flows));
+
+ if (e->compl_result > 0) {
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ }
+
+ kfree(e->tun_info);
+ kfree(e->encap_header);
+ kfree_rcu(e, rcu);
+}
+
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_decap_entry *d)
+{
+ WARN_ON(!list_empty(&d->flows));
+
+ if (!d->compl_result)
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
+
+ kfree_rcu(d, rcu);
+}
+
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
+ return;
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index);
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index)
+{
+ struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return;
+
+ if (attr->esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ mlx5e_detach_encap_route(priv, flow, out_index);
+
+ /* flow wasn't fully initialized */
+ if (!e)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->encaps[out_index].list);
+ flow->encaps[out_index].e = NULL;
+ if (!refcount_dec_and_test(&e->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
+
+ if (!d)
+ return;
+
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ list_del(&flow->l3_to_l2_reformat);
+ flow->decap_reformat = NULL;
+
+ if (!refcount_dec_and_test(&d->refcnt)) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b)
+{
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
+ a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
+}
+
+static int cmp_decap_info(struct mlx5e_decap_key *a,
+ struct mlx5e_decap_key *b)
+{
+ return memcmp(&a->key, &b->key, sizeof(b->key));
+}
+
+static int hash_encap_info(struct mlx5e_encap_key *key)
+{
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+ key->tc_tunnel->tunnel_type);
+}
+
+static int hash_decap_info(struct mlx5e_decap_key *key)
+{
+ return jhash(&key->key, sizeof(key->key), 0);
+}
+
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_key e_key;
+ struct mlx5e_encap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+ encap_hlist, hash_key) {
+ e_key.ip_tun_key = &e->tun_info->key;
+ e_key.tc_tunnel = e->tunnel;
+ if (e->tunnel->encap_info_equal(&e_key, key) &&
+ mlx5e_encap_take(e))
+ return e;
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_decap_entry *
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_key r_key;
+ struct mlx5e_decap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
+ hlist, hash_key) {
+ r_key = e->key;
+ if (!cmp_decap_info(&r_key, key) &&
+ mlx5e_decap_take(e))
+ return e;
+ }
+ return NULL;
+}
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+ size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+ return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
+static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < out_index; i++) {
+ if (flow->encaps[i].e != e)
+ continue;
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
+ netdev_err(priv->netdev, "can't duplicate encap action\n");
+ return true;
+ }
+
+ return false;
+}
+
+static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
+ goto out;
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ attr->dest_chain = 0;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
+ data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
+ vport_num);
+ err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB,
+ VPORT_TO_REG, data);
+ if (err >= 0) {
+ esw_attr->dests[out_index].src_port_rewrite_act_id = err;
+ err = 0;
+ }
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ int act_id = attr->dests[out_index].src_port_rewrite_act_id;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
+ vport_num);
+ mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ unsigned int ret;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ spin_lock_bh(&encap->route_lock);
+ ret = encap->route_tbl_last_update;
+ spin_unlock_bh(&encap->route_lock);
+ return ret;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ const struct ip_tunnel_info *tun_info;
+ const struct mlx5e_mpls_info *mpls_info;
+ unsigned long tbl_time_before = 0;
+ struct mlx5e_encap_entry *e;
+ struct mlx5e_encap_key key;
+ bool entry_created = false;
+ unsigned short family;
+ uintptr_t hash_key;
+ int err = 0;
+
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ mpls_info = &parse_attr->mpls_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+ key.ip_tun_key = &tun_info->key;
+ key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
+ if (!key.tc_tunnel) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
+ return -EOPNOTSUPP;
+ }
+
+ hash_key = hash_encap_info(&key);
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ e = mlx5e_encap_get(priv, &key, hash_key);
+
+ /* must verify if encap is valid or not */
+ if (e) {
+ /* Check that entry was not already attached to this flow */
+ if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ wait_for_completion(&e->res_ready);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (e->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto out_err;
+ }
+ goto attach_flow;
+ }
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ refcount_set(&e->refcnt, 1);
+ init_completion(&e->res_ready);
+ entry_created = true;
+ INIT_LIST_HEAD(&e->route_list);
+
+ tun_info = mlx5e_dup_tun_info(tun_info);
+ if (!tun_info) {
+ err = -ENOMEM;
+ goto out_err_init;
+ }
+ e->tun_info = tun_info;
+ memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+ if (err)
+ goto out_err_init;
+
+ INIT_LIST_HEAD(&e->flows);
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ if (family == AF_INET)
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
+ else if (family == AF_INET6)
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ complete_all(&e->res_ready);
+ if (err) {
+ e->compl_result = err;
+ goto out_err;
+ }
+ e->compl_result = 1;
+
+attach_flow:
+ err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
+ tbl_time_before, out_index);
+ if (err)
+ goto out_err;
+
+ err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
+ if (err == -EOPNOTSUPP) {
+ /* If device doesn't support int port offload,
+ * redirect to uplink vport.
+ */
+ mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
+ err = 0;
+ } else if (err) {
+ goto out_err;
+ }
+
+ flow->encaps[out_index].e = e;
+ list_add(&flow->encaps[out_index].list, &e->flows);
+ flow->encaps[out_index].index = out_index;
+ *encap_dev = e->out_dev;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
+ attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ } else {
+ flow_flag_set(flow, SLOW);
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ if (e)
+ mlx5e_encap_put(priv, e);
+ return err;
+
+out_err_init:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ kfree(tun_info);
+ kfree(e);
+ return err;
+}
+
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
+ struct mlx5_pkt_reformat_params reformat_params;
+ struct mlx5e_decap_entry *d;
+ struct mlx5e_decap_key key;
+ uintptr_t hash_key;
+ int err = 0;
+
+ if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "encap header larger than max supported");
+ return -EOPNOTSUPP;
+ }
+
+ key.key = attr->eth;
+ hash_key = hash_decap_info(&key);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ d = mlx5e_decap_get(priv, &key, hash_key);
+ if (d) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ wait_for_completion(&d->res_ready);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ if (d->compl_result) {
+ err = -EREMOTEIO;
+ goto out_free;
+ }
+ goto found;
+ }
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ d->key = key;
+ refcount_set(&d->refcnt, 1);
+ init_completion(&d->res_ready);
+ INIT_LIST_HEAD(&d->flows);
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ reformat_params.size = sizeof(attr->eth);
+ reformat_params.data = &attr->eth;
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ &reformat_params,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(d->pkt_reformat)) {
+ err = PTR_ERR(d->pkt_reformat);
+ d->compl_result = err;
+ }
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ complete_all(&d->res_ready);
+ if (err)
+ goto out_free;
+
+found:
+ flow->decap_reformat = d;
+ attr->decap_pkt_reformat = d->pkt_reformat;
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return 0;
+
+out_free:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ mlx5e_decap_put(priv, d);
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return err;
+}
+
+static int cmp_route_info(struct mlx5e_route_key *a,
+ struct mlx5e_route_key *b)
+{
+ if (a->ip_version == 4 && b->ip_version == 4)
+ return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
+ sizeof(a->endpoint_ip.v4));
+ else if (a->ip_version == 6 && b->ip_version == 6)
+ return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
+ sizeof(a->endpoint_ip.v6));
+ return 1;
+}
+
+static u32 hash_route_info(struct mlx5e_route_key *key)
+{
+ if (key->ip_version == 4)
+ return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
+ return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
+}
+
+static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r)
+{
+ WARN_ON(!list_empty(&r->decap_flows));
+ WARN_ON(!list_empty(&r->encap_entries));
+
+ kfree_rcu(r, rcu);
+}
+
+static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ lockdep_assert_held(&esw->offloads.encap_tbl_lock);
+
+ if (!refcount_dec_and_test(&r->refcnt))
+ return;
+ hash_del_rcu(&r->hlist);
+ mlx5e_route_dealloc(priv, r);
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
+ u32 hash_key)
+{
+ struct mlx5e_route_key r_key;
+ struct mlx5e_route_entry *r;
+
+ hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
+ r_key = r->key;
+ if (!cmp_route_info(&r_key, key) &&
+ refcount_inc_not_zero(&r->refcnt))
+ return r;
+ }
+ return NULL;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get_create(struct mlx5e_priv *priv,
+ struct mlx5e_route_key *key,
+ int tunnel_dev_index,
+ unsigned long *route_tbl_change_time)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ struct mlx5e_route_entry *r;
+ u32 hash_key;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ hash_key = hash_route_info(key);
+ spin_lock_bh(&encap->route_lock);
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+ if (r) {
+ if (!mlx5e_route_entry_valid(r)) {
+ mlx5e_route_put_locked(priv, r);
+ return ERR_PTR(-EINVAL);
+ }
+ return r;
+ }
+
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (!r)
+ return ERR_PTR(-ENOMEM);
+
+ r->key = *key;
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+ r->tunnel_dev_index = tunnel_dev_index;
+ refcount_set(&r->refcnt, 1);
+ INIT_LIST_HEAD(&r->decap_flows);
+ INIT_LIST_HEAD(&r->encap_entries);
+
+ spin_lock_bh(&encap->route_lock);
+ *route_tbl_change_time = encap->route_tbl_last_update;
+ hash_add(encap->route_tbl, &r->hlist, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
+{
+ u32 hash_key = hash_route_info(key);
+ struct mlx5e_route_entry *r;
+
+ spin_lock_bh(&encap->route_lock);
+ encap->route_tbl_last_update = jiffies;
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+struct mlx5e_tc_fib_event_data {
+ struct work_struct work;
+ unsigned long event;
+ struct mlx5e_route_entry *r;
+ struct net_device *ul_dev;
+};
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work);
+static struct mlx5e_tc_fib_event_data *
+mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+
+ fib_work = kzalloc(sizeof(*fib_work), flags);
+ if (WARN_ON(!fib_work))
+ return NULL;
+
+ INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
+ fib_work->event = event;
+ fib_work->ul_dev = ul_dev;
+
+ return fib_work;
+}
+
+static int
+mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ unsigned long event)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct net_device *ul_dev;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ ul_dev = uplink_rpriv->netdev;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
+ if (!fib_work)
+ return -ENOMEM;
+
+ dev_hold(ul_dev);
+ refcount_inc(&r->refcnt);
+ fib_work->r = r;
+ queue_work(priv->wq, &fib_work->work);
+
+ return 0;
+}
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_before, tbl_time_after;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (!esw_attr->rx_tun_attr)
+ goto out;
+
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ tbl_time_after = tbl_time_before;
+ err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
+ if (err || !esw_attr->rx_tun_attr->decap_vport)
+ goto out;
+
+ key.ip_version = attr->tun_ip_version;
+ if (key.ip_version == 4)
+ key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
+ else
+ key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
+ &tbl_time_after);
+ if (IS_ERR(r)) {
+ err = PTR_ERR(r);
+ goto out;
+ }
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ goto out;
+ }
+ }
+
+ flow->decap_route = r;
+ list_add(&flow->decap_routes, &r->decap_flows);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return 0;
+
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return err;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_after = tbl_time_before;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ unsigned short family;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+
+ if (family == AF_INET) {
+ key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
+ key.ip_version = 4;
+ } else if (family == AF_INET6) {
+ key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
+ key.ip_version = 6;
+ }
+
+ err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
+ e->route_dev_ifindex, out_index);
+ if (err || !(esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
+ return err;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
+ &tbl_time_after);
+ if (IS_ERR(r))
+ return PTR_ERR(r);
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ return err;
+ }
+ }
+
+ flow->encap_routes[out_index].r = r;
+ if (new_encap_entry)
+ list_add(&e->route_list, &r->encap_entries);
+ flow->encap_routes[out_index].index = out_index;
+ return 0;
+}
+
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_route_entry *r = flow->decap_route;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->decap_routes);
+ flow->decap_route = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index)
+{
+ struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e, *tmp;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ flow->encap_routes[out_index].r = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
+ list_del_init(&e->route_list);
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_attr *attr;
+
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+
+ if (flow_flag_test(flow, SLOW))
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ else
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
+ attr->modify_hdr = NULL;
+
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ e->pkt_reformat = NULL;
+ }
+}
+
+static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
+ struct net_device *tunnel_dev,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
+ mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
+ mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
+ e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+
+ err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
+ e->out_dev, e->route_dev_ifindex,
+ flow->tmp_entry_index);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
+ continue;
+ }
+
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
+ err);
+ continue;
+ }
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ goto offload_to_slow_path;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ goto offload_to_slow_path;
+ }
+
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
+ if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ } else {
+offload_to_slow_path:
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ }
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ struct mlx5e_encap_entry *e;
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ list_for_each_entry(e, &r->encap_entries, route_list) {
+ LIST_HEAD(encap_flows);
+
+ mlx5e_take_all_encap_flows(e, &encap_flows);
+ if (list_empty(&encap_flows))
+ continue;
+
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_invalidate_encap(priv, e, &encap_flows);
+
+ if (!replace) {
+ list_splice(&encap_flows, flow_list);
+ continue;
+ }
+
+ mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
+ list_splice(&encap_flows, flow_list);
+ }
+
+ return 0;
+}
+
+static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, flow_list, tmp_list)
+ if (mlx5e_is_offloaded_flow(flow))
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+}
+
+static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
+ struct list_head *decap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, decap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ parse_attr = attr->parse_attr;
+ spec = &parse_attr->spec;
+ err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
+ err);
+ continue;
+ }
+
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ flow_flag_set(flow, OFFLOADED);
+ }
+ }
+}
+
+static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ LIST_HEAD(decap_flows);
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ mlx5e_take_all_route_decap_flows(r, &decap_flows);
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_unoffload_flow_list(priv, &decap_flows);
+ if (replace)
+ mlx5e_reoffload_decap(priv, &decap_flows);
+
+ list_splice(&decap_flows, flow_list);
+
+ return 0;
+}
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work)
+{
+ struct mlx5e_tc_fib_event_data *event_data =
+ container_of(work, struct mlx5e_tc_fib_event_data, work);
+ struct net_device *ul_dev = event_data->ul_dev;
+ struct mlx5e_priv *priv = netdev_priv(ul_dev);
+ struct mlx5e_route_entry *r = event_data->r;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+ bool replace;
+ int err;
+
+ /* sync with concurrent neigh updates */
+ rtnl_lock();
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
+
+ if (!mlx5e_route_entry_valid(r) && !replace)
+ goto out;
+
+ err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
+ err);
+
+ err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
+ err);
+
+ if (replace)
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ rtnl_unlock();
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ mlx5e_route_put(priv, event_data->r);
+ dev_put(event_data->ul_dev);
+ kfree(event_data);
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib_entry_notifier_info, info);
+ if (fen_info->fi->nh)
+ return NULL;
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->dst_len != 32)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ key.endpoint_ip.v4 = htonl(fen_info->dst);
+ key.ip_version = 4;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib6_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib6_entry_notifier_info, info);
+ fib_dev = fib6_info_nh_dev(fen_info->rt);
+ if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->rt->fib6_dst.plen != 128)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
+ sizeof(fen_info->rt->fib6_dst.addr));
+ key.ip_version = 6;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct fib_notifier_info *info = ptr;
+ struct mlx5e_tc_tun_encap *encap;
+ struct net_device *ul_dev;
+ struct mlx5e_priv *priv;
+
+ encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
+ priv = encap->priv;
+ ul_dev = priv->netdev;
+ priv = netdev_priv(ul_dev);
+
+ switch (event) {
+ case FIB_EVENT_ENTRY_REPLACE:
+ case FIB_EVENT_ENTRY_DEL:
+ if (info->family == AF_INET)
+ fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
+ else if (info->family == AF_INET6)
+ fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
+ else
+ return NOTIFY_DONE;
+
+ if (!IS_ERR_OR_NULL(fib_work)) {
+ queue_work(priv->wq, &fib_work->work);
+ } else if (IS_ERR(fib_work)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
+ mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
+ PTR_ERR(fib_work));
+ }
+
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_DONE;
+}
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_tun_encap *encap;
+ int err;
+
+ encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
+ if (!encap)
+ return ERR_PTR(-ENOMEM);
+
+ encap->priv = priv;
+ encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
+ spin_lock_init(&encap->route_lock);
+ hash_init(encap->route_tbl);
+ err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
+ NULL, NULL);
+ if (err) {
+ kvfree(encap);
+ return ERR_PTR(err);
+ }
+
+ return encap;
+}
+
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
+{
+ if (!encap)
+ return;
+
+ unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
+ flush_workqueue(encap->priv->wq); /* flush fib event works */
+ kvfree(encap);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
new file mode 100644
index 000000000..8ad273dde
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUN_ENCAP_H__
+#define __MLX5_EN_TC_TUN_ENCAP_H__
+
+#include "tc_priv.h"
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev);
+
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack);
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info);
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv);
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap);
+
+#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
new file mode 100644
index 000000000..054d80c4e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/geneve.h>
+#include "lib/geneve.h"
+#include "en/tc_tun.h"
+
+#define MLX5E_GENEVE_VER 0
+
+static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv)
+{
+ return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE);
+}
+
+static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e)
+{
+ return sizeof(struct udphdr) +
+ sizeof(struct genevehdr) +
+ e->tun_info->options_len;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ /* Currently we support only default GENEVE
+ * port, so udp dst port must match.
+ */
+ if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP dst port is not registered as a GENEVE port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a GENEVE port\n",
+ be16_to_cpu(enc_ports.key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err;
+
+ err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_check_udp_dport_geneve(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &geneve_tunnel;
+
+ /* Reformat type for GENEVE encap is similar to VXLAN:
+ * in both cases the HW adds in the same place a
+ * defined encapsulation header that the SW provides.
+ */
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ return 0;
+}
+
+static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ vni[0] = (__force __u8)(tun_id >> 16);
+ vni[1] = (__force __u8)(tun_id >> 8);
+ vni[2] = (__force __u8)tun_id;
+#else
+ vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+ vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+ vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+static int mlx5e_gen_ip_tunnel_header_geneve(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_info *tun_info = e->tun_info;
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct genevehdr *geneveh;
+
+ geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr));
+
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_info->key.tp_dst;
+
+ memset(geneveh, 0, sizeof(*geneveh));
+ geneveh->ver = MLX5E_GENEVE_VER;
+ geneveh->opt_len = tun_info->options_len / 4;
+ geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM);
+ geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT);
+ mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni);
+ geneveh->proto_type = htons(ETH_P_TEB);
+
+ if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ if (!geneveh->opt_len)
+ return -EOPNOTSUPP;
+ ip_tunnel_info_opts_get(geneveh->options, tun_info);
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_enc_keyid enc_keyid;
+ void *misc_c, *misc_v;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported");
+ netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid));
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len);
+ u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ void *misc_c, *misc_v, *misc_3_c, *misc_3_v;
+ struct geneve_opt *option_key, *option_mask;
+ __be32 opt_data_key = 0, opt_data_mask = 0;
+ struct flow_match_enc_opts enc_opts;
+ int res = 0;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3);
+ misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
+ return 0;
+
+ flow_rule_match_enc_opts(rule, &enc_opts);
+
+ if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.geneve_tlv_option_0_data)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* make sure that we're talking about GENEVE options */
+
+ if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: option type is not GENEVE");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: option type is not GENEVE\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (enc_opts.mask->len &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_geneve_opt_len)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options len is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it
+ * doesn't include the TLV option header. 'geneve_opt_len' is a total
+ * len of all the options, including the headers, also multiples of 4
+ * bytes. Len that comes from the dissector is in bytes.
+ */
+
+ if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: unsupported options len");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: unsupported options len (len=%d)\n",
+ enc_opts.key->len);
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4);
+
+ /* we support matching on one option only, so just get it */
+ option_key = (struct geneve_opt *)&enc_opts.key->data[0];
+ option_mask = (struct geneve_opt *)&enc_opts.mask->data[0];
+
+ if (option_mask->opt_class == 0 && option_mask->type == 0 &&
+ !memchr_inv(option_mask->opt_data, 0, option_mask->length * 4))
+ return 0;
+
+ if (option_key->length > max_tlv_option_data_len) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: unsupported option len");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n",
+ option_key->length, option_mask->length);
+ return -EOPNOTSUPP;
+ }
+
+ /* data can't be all 0 - fail to offload such rule */
+ if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: can't match on 0 data field");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: can't match on 0 data field\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* add new GENEVE TLV options object */
+ res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key);
+ if (res) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: failed creating TLV opt object");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n",
+ be16_to_cpu(option_key->opt_class),
+ option_key->type, option_key->length);
+ return res;
+ }
+
+ /* In general, after creating the object, need to query it
+ * in order to check which option data to set in misc3.
+ * But we support only geneve_tlv_option_0_data, so no
+ * point querying at this stage.
+ */
+
+ memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4);
+ memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4);
+ MLX5_SET(fte_match_set_misc3, misc_3_v,
+ geneve_tlv_option_0_data, be32_to_cpu(opt_data_key));
+ MLX5_SET(fte_match_set_misc3, misc_3_c,
+ geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.geneve_tlv_option_0_exist)) {
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist);
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ struct netlink_ext_ack *extack = f->common.extack;
+
+ /* match on OAM - packets with OAM bit on should NOT be offloaded */
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported");
+ netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0);
+
+ /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */
+
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_geneve_protocol_type)) {
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB);
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err;
+
+ err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_parse_geneve_options(priv, spec, f);
+}
+
+static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a,
+ struct mlx5e_encap_key *b)
+{
+ struct ip_tunnel_info *a_info;
+ struct ip_tunnel_info *b_info;
+ bool a_has_opts, b_has_opts;
+
+ if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
+ return false;
+
+ a_has_opts = !!(a->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT);
+ b_has_opts = !!(b->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT);
+
+ /* keys are equal when both don't have any options attached */
+ if (!a_has_opts && !b_has_opts)
+ return true;
+
+ if (a_has_opts != b_has_opts)
+ return false;
+
+ /* geneve options stored in memory next to ip_tunnel_info struct */
+ a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
+ b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
+
+ return a_info->options_len == b_info->options_len &&
+ memcmp(a_info + 1, b_info + 1, a_info->options_len) == 0;
+}
+
+struct mlx5e_tc_tunnel geneve_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = mlx5e_tc_tun_can_offload_geneve,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve,
+ .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve,
+ .parse_tunnel = mlx5e_tc_tun_parse_geneve,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_geneve,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
new file mode 100644
index 000000000..ada14f057
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/gre.h>
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv)
+{
+ return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e)
+{
+ return gre_calc_hlen(e->tun_info->key.tun_flags);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &gre_tunnel;
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_gretap(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ int hdr_len;
+
+ *ip_proto = IPPROTO_GRE;
+
+ /* the HW does not calculate GRE csum or sequences */
+ if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
+ return -EOPNOTSUPP;
+
+ greh->protocol = htons(ETH_P_TEB);
+
+ /* GRE key */
+ hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e);
+ greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
+ if (tun_key->tun_flags & TUNNEL_KEY) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+ *ptr = tun_id;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE);
+
+ /* gre protocol */
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
+ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
+
+ /* gre key */
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_match_enc_keyid enc_keyid;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+ MLX5_SET(fte_match_set_misc, misc_c,
+ gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v,
+ gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel gre_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP,
+ .match_level = MLX5_MATCH_L3,
+ .can_offload = mlx5e_tc_tun_can_offload_gretap,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap,
+ .parse_udp_ports = NULL,
+ .parse_tunnel = mlx5e_tc_tun_parse_gretap,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
new file mode 100644
index 000000000..c5b1617d5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/bareudp.h>
+#include <net/mpls.h>
+#include "en/tc_tun.h"
+
+static bool can_offload(struct mlx5e_priv *priv)
+{
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2);
+}
+
+static int calc_hlen(struct mlx5e_encap_entry *e)
+{
+ return sizeof(struct udphdr) + MPLS_HLEN;
+}
+
+static int init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &mplsoudp_tunnel;
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ return 0;
+}
+
+static int generate_ip_tun_hdr(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *r)
+{
+ const struct ip_tunnel_key *tun_key = &r->tun_info->key;
+ const struct mlx5e_mpls_info *mpls_info = &r->mpls_info;
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct mpls_shim_hdr *mpls;
+
+ mpls = (struct mpls_shim_hdr *)(udp + 1);
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_key->tp_dst;
+ *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos);
+
+ return 0;
+}
+
+static int parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+}
+
+static int parse_tunnel(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_match_mpls match;
+ void *misc2_c;
+ void *misc2_v;
+
+ if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) &&
+ !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP))
+ return -EOPNOTSUPP;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return -EOPNOTSUPP;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+ return 0;
+
+ flow_rule_match_mpls(rule, &match);
+
+ /* Only support matching the first LSE */
+ if (match.mask->used_lses != 1)
+ return -EOPNOTSUPP;
+
+ misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_label,
+ match.mask->ls[0].mpls_label);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_label,
+ match.key->ls[0].mpls_label);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_exp,
+ match.mask->ls[0].mpls_tc);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_exp, match.key->ls[0].mpls_tc);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_s_bos,
+ match.mask->ls[0].mpls_bos);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_s_bos,
+ match.key->ls[0].mpls_bos);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_ttl,
+ match.mask->ls[0].mpls_ttl);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_ttl,
+ match.key->ls[0].mpls_ttl);
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel mplsoudp_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = can_offload,
+ .calc_hlen = calc_hlen,
+ .init_encap_attr = init_encap_attr,
+ .generate_ip_tun_hdr = generate_ip_tun_hdr,
+ .parse_udp_ports = parse_udp_ports,
+ .parse_tunnel = parse_tunnel,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
new file mode 100644
index 000000000..1f62c702b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include "lib/vxlan.h"
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv)
+{
+ return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e)
+{
+ return VXLAN_HLEN;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ /* check the UDP destination port validity */
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan,
+ be16_to_cpu(enc_ports.key->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP dst port is not registered as a VXLAN port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a VXLAN port\n",
+ be16_to_cpu(enc_ports.key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err = 0;
+
+ err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int dst_port = be16_to_cpu(e->tun_info->key.tp_dst);
+
+ e->tunnel = &vxlan_tunnel;
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vxlan udp dport was not registered with the HW");
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n",
+ dst_port);
+ return -EOPNOTSUPP;
+ }
+
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct vxlanhdr *vxh;
+
+ if (tun_key->tun_flags & TUNNEL_VXLAN_OPT)
+ return -EOPNOTSUPP;
+ vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_key->tp_dst;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(tun_id);
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_enc_keyid enc_keyid;
+ void *misc_c, *misc_v;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ /* match on VNI is required */
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_vxlan_vni)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on VXLAN VNI is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on VXLAN VNI is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+ be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+ be32_to_cpu(enc_keyid.key->keyid));
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev)
+{
+ const struct vxlan_dev *vxlan = netdev_priv(mirred_dev);
+ const struct vxlan_rdst *dst = &vxlan->default_dst;
+
+ return dst->remote_ifindex;
+}
+
+struct mlx5e_tc_tunnel vxlan_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = mlx5e_tc_tun_can_offload_vxlan,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan,
+ .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan,
+ .parse_tunnel = mlx5e_tc_tun_parse_vxlan,
+ .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic,
+ .get_remote_ifindex = mlx5e_tc_tun_get_remote_ifindex,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
new file mode 100644
index 000000000..d4239e3b3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "tir.h"
+#include "params.h"
+#include <linux/mlx5/transobj.h>
+
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
+
+/* max() doesn't work inside square brackets. */
+#define MLX5E_TIR_CMD_IN_SZ_DW ( \
+ MLX5_ST_SZ_DW(create_tir_in) > MLX5_ST_SZ_DW(modify_tir_in) ? \
+ MLX5_ST_SZ_DW(create_tir_in) : MLX5_ST_SZ_DW(modify_tir_in) \
+)
+
+struct mlx5e_tir_builder {
+ u32 in[MLX5E_TIR_CMD_IN_SZ_DW];
+ bool modify;
+};
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify)
+{
+ struct mlx5e_tir_builder *builder;
+
+ builder = kvzalloc(sizeof(*builder), GFP_KERNEL);
+ builder->modify = modify;
+
+ return builder;
+}
+
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder)
+{
+ kvfree(builder);
+}
+
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder)
+{
+ memset(builder->in, 0, sizeof(builder->in));
+}
+
+static void *mlx5e_tir_builder_get_tirc(struct mlx5e_tir_builder *builder)
+{
+ if (builder->modify)
+ return MLX5_ADDR_OF(modify_tir_in, builder->in, ctx);
+ return MLX5_ADDR_OF(create_tir_in, builder->in, ctx);
+}
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
+ MLX5_SET(tirc, tirc, inline_rqn, rqn);
+}
+
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ u32 rqtn, bool inner_ft_support)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, rqtn);
+ MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
+}
+
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_packet_merge_param *pkt_merge_param)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+ const unsigned int rough_max_l2_l3_hdr_sz = 256;
+
+ if (builder->modify)
+ MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1);
+
+ switch (pkt_merge_param->type) {
+ case MLX5E_PACKET_MERGE_LRO:
+ MLX5_SET(tirc, tirc, packet_merge_mask,
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO |
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+ (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
+ break;
+ default:
+ break;
+ }
+}
+
+static int mlx5e_hfunc_to_hw(u8 hfunc)
+{
+ switch (hfunc) {
+ case ETH_RSS_HASH_TOP:
+ return MLX5_RX_HASH_FN_TOEPLITZ;
+ case ETH_RSS_HASH_XOR:
+ return MLX5_RX_HASH_FN_INVERTED_XOR8;
+ default:
+ return MLX5_RX_HASH_FN_NONE;
+ }
+}
+
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_rss_params_hash *rss_hash,
+ const struct mlx5e_rss_params_traffic_type *rss_tt,
+ bool inner)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+ void *hfso;
+
+ if (builder->modify)
+ MLX5_SET(modify_tir_in, builder->in, bitmask.hash, 1);
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_hfunc_to_hw(rss_hash->hfunc));
+ if (rss_hash->hfunc == ETH_RSS_HASH_TOP) {
+ const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
+ void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ memcpy(rss_key, rss_hash->toeplitz_hash_key, len);
+ }
+
+ if (inner)
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
+ else
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, rss_tt->l3_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, rss_tt->l4_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields, rss_tt->rx_hash_fields);
+}
+
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
+}
+
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder)
+{
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+ WARN_ON(builder->modify);
+
+ MLX5_SET(tirc, tirc, tls_en, 1);
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST |
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST);
+}
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+ struct mlx5_core_dev *mdev, bool reg)
+{
+ int err;
+
+ tir->mdev = mdev;
+
+ err = mlx5_core_create_tir(tir->mdev, builder->in, &tir->tirn);
+ if (err)
+ return err;
+
+ if (reg) {
+ struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+ mutex_lock(&res->td.list_lock);
+ list_add(&tir->list, &res->td.tirs_list);
+ mutex_unlock(&res->td.list_lock);
+ } else {
+ INIT_LIST_HEAD(&tir->list);
+ }
+
+ return 0;
+}
+
+void mlx5e_tir_destroy(struct mlx5e_tir *tir)
+{
+ struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+ /* Skip mutex if list_del is no-op (the TIR wasn't registered in the
+ * list). list_empty will never return true for an item of tirs_list,
+ * and READ_ONCE/WRITE_ONCE in list_empty/list_del guarantee consistency
+ * of the list->next value.
+ */
+ if (!list_empty(&tir->list)) {
+ mutex_lock(&res->td.list_lock);
+ list_del(&tir->list);
+ mutex_unlock(&res->td.list_lock);
+ }
+
+ mlx5_core_destroy_tir(tir->mdev, tir->tirn);
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder)
+{
+ return mlx5_core_modify_tir(tir->mdev, tir->tirn, builder->in);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
new file mode 100644
index 000000000..857a84bcd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_TIR_H__
+#define __MLX5_EN_TIR_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_rss_params_hash {
+ u8 hfunc;
+ u8 toeplitz_hash_key[40];
+};
+
+struct mlx5e_rss_params_traffic_type {
+ u8 l3_prot_type;
+ u8 l4_prot_type;
+ u32 rx_hash_fields;
+};
+
+struct mlx5e_tir_builder;
+struct mlx5e_packet_merge_param;
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ u32 rqtn, bool inner_ft_support);
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_packet_merge_param *pkt_merge_param);
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_rss_params_hash *rss_hash,
+ const struct mlx5e_rss_params_traffic_type *rss_tt,
+ bool inner);
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder);
+
+struct mlx5_core_dev;
+
+struct mlx5e_tir {
+ struct mlx5_core_dev *mdev;
+ u32 tirn;
+ struct list_head list;
+};
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+ struct mlx5_core_dev *mdev, bool reg);
+void mlx5e_tir_destroy(struct mlx5e_tir *tir);
+
+static inline u32 mlx5e_tir_get_tirn(struct mlx5e_tir *tir)
+{
+ return tir->tirn;
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder);
+
+#endif /* __MLX5_EN_TIR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
new file mode 100644
index 000000000..201ac7dd3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies */
+
+#include <net/page_pool.h>
+#include "en/txrx.h"
+#include "en/params.h"
+#include "en/trap.h"
+
+static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_trap *trap_ctx = container_of(napi, struct mlx5e_trap, napi);
+ struct mlx5e_ch_stats *ch_stats = trap_ctx->stats;
+ struct mlx5e_rq *rq = &trap_ctx->rq;
+ bool busy = false;
+ int work_done = 0;
+
+ rcu_read_lock();
+
+ ch_stats->poll++;
+
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+ busy |= work_done == budget;
+ busy |= rq->post_wqes(rq);
+
+ if (busy) {
+ work_done = budget;
+ goto out;
+ }
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ goto out;
+
+ mlx5e_cq_arm(&rq->cq);
+
+out:
+ rcu_read_unlock();
+ return work_done;
+}
+
+static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = t->mdev;
+ struct mlx5e_priv *priv = t->priv;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = t->pdev;
+ rq->netdev = priv->netdev;
+ rq->priv = priv;
+ rq->clock = &mdev->clock;
+ rq->tstamp = &priv->tstamp;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->stats = &priv->trap_stats.rq;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ xdp_rxq_info_unused(&rq->xdp_rxq);
+ mlx5e_rq_set_trap_handlers(rq, params);
+}
+
+static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t)
+{
+ struct mlx5e_rq_param *rq_param = &t->rq_param;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder trap_moder = {};
+ struct mlx5e_rq *rq = &t->rq;
+ int node;
+ int err;
+
+ node = dev_to_node(mdev->device);
+
+ ccp.node = node;
+ ccp.ch_stats = t->stats;
+ ccp.napi = &t->napi;
+ ccp.ix = 0;
+ err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, &rq->cq);
+ if (err)
+ return err;
+
+ mlx5e_init_trap_rq(t, &t->params, rq);
+ err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq);
+ if (err)
+ goto err_destroy_cq;
+
+ return 0;
+
+err_destroy_cq:
+ mlx5e_close_cq(&rq->cq);
+
+ return err;
+}
+
+static void mlx5e_close_trap_rq(struct mlx5e_rq *rq)
+{
+ mlx5e_close_rq(rq);
+ mlx5e_close_cq(&rq->cq);
+}
+
+static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
+ u32 rqn)
+{
+ struct mlx5e_tir_builder *builder;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ mlx5e_tir_builder_build_inline(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqn);
+ err = mlx5e_tir_init(tir, builder, mdev, true);
+
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
+
+static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev,
+ int max_mtu, u16 q_counter,
+ struct mlx5e_trap *t)
+{
+ struct mlx5e_params *params = &t->params;
+
+ params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+ mlx5e_init_rq_type_params(mdev, params);
+ params->sw_mtu = max_mtu;
+ mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param);
+}
+
+static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
+{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, 0));
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_trap *t;
+ int err;
+
+ t = kvzalloc_node(sizeof(*t), GFP_KERNEL, cpu_to_node(cpu));
+ if (!t)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t);
+
+ t->priv = priv;
+ t->mdev = priv->mdev;
+ t->tstamp = &priv->tstamp;
+ t->pdev = mlx5_core_dma_dev(priv->mdev);
+ t->netdev = priv->netdev;
+ t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
+ t->stats = &priv->trap_stats.ch;
+
+ netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll);
+
+ err = mlx5e_open_trap_rq(priv, t);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ err = mlx5e_create_trap_direct_rq_tir(t->mdev, &t->tir, t->rq.rqn);
+ if (err)
+ goto err_close_trap_rq;
+
+ return t;
+
+err_close_trap_rq:
+ mlx5e_close_trap_rq(&t->rq);
+err_napi_del:
+ netif_napi_del(&t->napi);
+ kvfree(t);
+ return ERR_PTR(err);
+}
+
+void mlx5e_close_trap(struct mlx5e_trap *trap)
+{
+ mlx5e_tir_destroy(&trap->tir);
+ mlx5e_close_trap_rq(&trap->rq);
+ netif_napi_del(&trap->napi);
+ kvfree(trap);
+}
+
+static void mlx5e_activate_trap(struct mlx5e_trap *trap)
+{
+ napi_enable(&trap->napi);
+ mlx5e_activate_rq(&trap->rq);
+ mlx5e_trigger_napi_sched(&trap->napi);
+}
+
+void mlx5e_deactivate_trap(struct mlx5e_priv *priv)
+{
+ struct mlx5e_trap *trap = priv->en_trap;
+
+ mlx5e_deactivate_rq(&trap->rq);
+ napi_disable(&trap->napi);
+}
+
+static struct mlx5e_trap *mlx5e_add_trap_queue(struct mlx5e_priv *priv)
+{
+ struct mlx5e_trap *trap;
+
+ trap = mlx5e_open_trap(priv);
+ if (IS_ERR(trap))
+ goto out;
+
+ mlx5e_activate_trap(trap);
+out:
+ return trap;
+}
+
+static void mlx5e_del_trap_queue(struct mlx5e_priv *priv)
+{
+ mlx5e_deactivate_trap(priv);
+ mlx5e_close_trap(priv->en_trap);
+ priv->en_trap = NULL;
+}
+
+static int mlx5e_trap_get_tirn(struct mlx5e_trap *en_trap)
+{
+ return en_trap->tir.tirn;
+}
+
+static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id)
+{
+ bool open_queue = !priv->en_trap;
+ struct mlx5e_trap *trap;
+ int err;
+
+ if (open_queue) {
+ trap = mlx5e_add_trap_queue(priv);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+ priv->en_trap = trap;
+ }
+
+ switch (trap_id) {
+ case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
+ err = mlx5e_add_vlan_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ if (err)
+ goto err_out;
+ break;
+ case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
+ err = mlx5e_add_mac_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ if (err)
+ goto err_out;
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
+ err = -EINVAL;
+ goto err_out;
+ }
+ return 0;
+
+err_out:
+ if (open_queue)
+ mlx5e_del_trap_queue(priv);
+ return err;
+}
+
+static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id)
+{
+ switch (trap_id) {
+ case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
+ mlx5e_remove_vlan_trap(priv->fs);
+ break;
+ case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
+ mlx5e_remove_mac_trap(priv->fs);
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
+ return -EINVAL;
+ }
+ if (priv->en_trap && !mlx5_devlink_trap_get_num_active(priv->mdev))
+ mlx5e_del_trap_queue(priv);
+
+ return 0;
+}
+
+int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx)
+{
+ int err = 0;
+
+ /* Traps are unarmed when interface is down, no need to update
+ * them. The configuration is saved in the core driver,
+ * queried and applied upon interface up operation in
+ * mlx5e_open_locked().
+ */
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ switch (trap_ctx->action) {
+ case DEVLINK_TRAP_ACTION_TRAP:
+ err = mlx5e_handle_action_trap(priv, trap_ctx->id);
+ break;
+ case DEVLINK_TRAP_ACTION_DROP:
+ err = mlx5e_handle_action_drop(priv, trap_ctx->id);
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unsupported action %d\n", __func__,
+ trap_ctx->action);
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static int mlx5e_apply_trap(struct mlx5e_priv *priv, int trap_id, bool enable)
+{
+ enum devlink_trap_action action;
+ int err;
+
+ err = mlx5_devlink_traps_get_action(priv->mdev, trap_id, &action);
+ if (err)
+ return err;
+ if (action == DEVLINK_TRAP_ACTION_TRAP)
+ err = enable ? mlx5e_handle_action_trap(priv, trap_id) :
+ mlx5e_handle_action_drop(priv, trap_id);
+ return err;
+}
+
+static const int mlx5e_traps_arr[] = {
+ DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER,
+ DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER,
+};
+
+int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5e_traps_arr); i++) {
+ err = mlx5e_apply_trap(priv, mlx5e_traps_arr[i], enable);
+ if (err)
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h
new file mode 100644
index 000000000..aa3f17658
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies */
+
+#ifndef __MLX5E_TRAP_H__
+#define __MLX5E_TRAP_H__
+
+#include "../en.h"
+#include "../devlink.h"
+
+struct mlx5e_trap {
+ /* data path */
+ struct mlx5e_rq rq;
+ struct mlx5e_tir tir;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+
+ /* data path - accessed per napi poll */
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
+
+ struct mlx5e_params params;
+ struct mlx5e_rq_param rq_param;
+};
+
+void mlx5e_close_trap(struct mlx5e_trap *trap);
+void mlx5e_deactivate_trap(struct mlx5e_priv *priv);
+int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx);
+int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
new file mode 100644
index 000000000..344245c01
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -0,0 +1,494 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TXRX_H___
+#define __MLX5_EN_TXRX_H___
+
+#include "en.h"
+#include <linux/indirect_call_wrapper.h>
+
+#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+/* IPSEC inline data includes:
+ * 1. ESP trailer: up to 255 bytes of padding, 1 byte for pad length, 1 byte for
+ * next header.
+ * 2. ESP authentication data: 16 bytes for ICV.
+ */
+#define MLX5E_MAX_TX_IPSEC_DS DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + \
+ 255 + 1 + 1 + 16, MLX5_SEND_WQE_DS)
+
+/* 366 should be big enough to cover all L2, L3 and L4 headers with possible
+ * encapsulations.
+ */
+#define MLX5E_MAX_TX_INLINE_DS DIV_ROUND_UP(366 - INL_HDR_START_SZ + VLAN_HLEN, \
+ MLX5_SEND_WQE_DS)
+
+/* Sync the calculation with mlx5e_sq_calc_wqe_attr. */
+#define MLX5E_MAX_TX_WQEBBS DIV_ROUND_UP(MLX5E_TX_WQE_EMPTY_DS_COUNT + \
+ MLX5E_MAX_TX_INLINE_DS + \
+ MLX5E_MAX_TX_IPSEC_DS + \
+ MAX_SKB_FRAGS + 1, \
+ MLX5_SEND_WQEBB_NUM_DS)
+
+#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
+
+static inline
+ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts)
+{
+ return INDIRECT_CALL_2(func, mlx5_real_time_cyc2time, mlx5_timecounter_cyc2time,
+ clock, cqe_ts);
+}
+
+enum mlx5e_icosq_wqe_type {
+ MLX5E_ICOSQ_WQE_NOP,
+ MLX5E_ICOSQ_WQE_UMR_RX,
+ MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
+#ifdef CONFIG_MLX5_EN_TLS
+ MLX5E_ICOSQ_WQE_UMR_TLS,
+ MLX5E_ICOSQ_WQE_SET_PSV_TLS,
+ MLX5E_ICOSQ_WQE_GET_PSV_TLS,
+#endif
+};
+
+/* General */
+static inline bool mlx5e_skb_is_multicast(struct sk_buff *skb)
+{
+ return skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST;
+}
+
+void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
+int mlx5e_napi_poll(struct napi_struct *napi, int budget);
+int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
+
+/* RX */
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle);
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
+int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
+void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
+
+/* TX */
+netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
+bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
+
+static inline bool
+mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
+{
+ return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
+}
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+ return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
+}
+
+static inline void *mlx5e_fetch_wqe(struct mlx5_wq_cyc *wq, u16 pi, size_t wqe_size)
+{
+ void *wqe;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(wqe, 0, wqe_size);
+
+ return wqe;
+}
+
+#define MLX5E_TX_FETCH_WQE(sq, pi) \
+ ((struct mlx5e_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_tx_wqe)))
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+ cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+ (*pc)++;
+
+ return wqe;
+}
+
+struct mlx5e_tx_wqe_info {
+ struct sk_buff *skb;
+ u32 num_bytes;
+ u8 num_wqebbs;
+ u8 num_dma;
+ u8 num_fifo_pkts;
+#ifdef CONFIG_MLX5_EN_TLS
+ struct page *resync_dump_frag_page;
+#endif
+};
+
+static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_tx_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
+void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq);
+
+static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
+}
+
+struct mlx5e_shampo_umr {
+ u16 len;
+};
+
+struct mlx5e_icosq_wqe_info {
+ u8 wqe_type;
+ u8 num_wqebbs;
+
+ /* Auxiliary data for different wqe types. */
+ union {
+ struct {
+ struct mlx5e_rq *rq;
+ } umr;
+ struct mlx5e_shampo_umr shampo;
+#ifdef CONFIG_MLX5_EN_TLS
+ struct {
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+ } tls_set_params;
+ struct {
+ struct mlx5e_ktls_rx_resync_buf *buf;
+ } tls_get_params;
+#endif
+ };
+};
+
+void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq);
+
+static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_icosq_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_NOP,
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
+static inline void
+mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
+ struct mlx5_wqe_ctrl_seg *ctrl)
+{
+ ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ *wq->db = cpu_to_be32(pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, uar_map);
+}
+
+static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq;
+
+ mcq = &cq->mcq;
+ mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+}
+
+static inline struct mlx5e_sq_dma *
+mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+ return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static inline void
+mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
+ enum mlx5e_dma_map_type map_type)
+{
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
+
+ dma->addr = addr;
+ dma->size = size;
+ dma->type = map_type;
+}
+
+static inline
+struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_skb_fifo *fifo, u16 i)
+{
+ return &fifo->fifo[i & fifo->mask];
+}
+
+static inline
+void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
+{
+ struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
+
+ *skb_item = skb;
+}
+
+static inline
+struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo)
+{
+ WARN_ON_ONCE(*fifo->pc == *fifo->cc);
+
+ return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++);
+}
+
+static inline void
+mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
+{
+ switch (dma->type) {
+ case MLX5E_DMA_MAP_SINGLE:
+ dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ case MLX5E_DMA_MAP_PAGE:
+ dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ default:
+ WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+ }
+}
+
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
+
+static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
+{
+ return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
+}
+
+static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
+{
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ mlx5_wq_ll_reset(&rq->mpwqe.wq);
+ rq->mpwqe.actual_wq_head = 0;
+ } else {
+ mlx5_wq_cyc_reset(&rq->wqe.wq);
+ }
+}
+
+static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 qn,
+ struct mlx5_err_cqe *err_cqe)
+{
+ struct mlx5_cqwq *wq = &cq->wq;
+ u32 ci;
+
+ ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
+
+ netdev_err(cq->netdev,
+ "Error cqe on cqn 0x%x, ci 0x%x, qn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+ cq->mcq.cqn, ci, qn,
+ get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
+ err_cqe->syndrome, err_cqe->vendor_err_synd);
+ mlx5_dump_err_cqe(cq->mdev, err_cqe);
+}
+
+static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_size(&rq->wqe.wq);
+ }
+}
+
+static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return rq->mpwqe.wq.cur_sz;
+ default:
+ return rq->wqe.wq.cur_sz;
+ }
+}
+
+static inline u16 mlx5e_rqwq_get_head(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_head(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_head(&rq->wqe.wq);
+ }
+}
+
+static inline u16 mlx5e_rqwq_get_wqe_counter(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_counter(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_counter(&rq->wqe.wq);
+ }
+}
+
+/* SW parser related functions */
+
+struct mlx5e_swp_spec {
+ __be16 l3_proto;
+ u8 l4_proto;
+ u8 is_tun;
+ __be16 tun_l3_proto;
+ u8 tun_l4_proto;
+};
+
+static inline void mlx5e_eseg_swp_offsets_add_vlan(struct mlx5_wqe_eth_seg *eseg)
+{
+ /* SWP offsets are in 2-bytes words */
+ eseg->swp_outer_l3_offset += VLAN_HLEN / 2;
+ eseg->swp_outer_l4_offset += VLAN_HLEN / 2;
+ eseg->swp_inner_l3_offset += VLAN_HLEN / 2;
+ eseg->swp_inner_l4_offset += VLAN_HLEN / 2;
+}
+
+static inline void
+mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
+ struct mlx5e_swp_spec *swp_spec)
+{
+ /* SWP offsets are in 2-bytes words */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+ if (swp_spec->l4_proto) {
+ eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
+ if (swp_spec->l4_proto == IPPROTO_UDP)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
+ }
+
+ if (swp_spec->is_tun) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
+ eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ }
+ switch (swp_spec->tun_l4_proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ }
+}
+
+#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1)
+
+static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size)
+{
+ WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < mlx5e_get_max_sq_wqebbs(mdev));
+
+ /* A WQE must not cross the page boundary, hence two conditions:
+ * 1. Its size must not exceed the page size.
+ * 2. If the WQE size is X, and the space remaining in a page is less
+ * than X, this space needs to be padded with NOPs. So, one WQE of
+ * size X may require up to X-1 WQEBBs of padding, which makes the
+ * stop room of X-1 + X.
+ * WQE size is also limited by the hardware limit.
+ */
+ WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev),
+ "wqe_size %u is greater than max SQ WQEBBs %u",
+ wqe_size, mlx5e_get_max_sq_wqebbs(mdev));
+
+ return MLX5E_STOP_ROOM(wqe_size);
+}
+
+static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev)
+{
+ return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev));
+}
+
+static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev)
+{
+ u8 mpwqe_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+
+ return mlx5e_stop_room_for_wqe(mdev, mpwqe_wqebbs);
+}
+
+static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size)
+{
+ u16 room = sq->reserved_room + MLX5E_STOP_ROOM(wqe_size);
+
+ return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room);
+}
+
+static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
+{
+ size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
+
+ return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
new file mode 100644
index 000000000..20507ef2f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include "en/xdp.h"
+#include "en/params.h"
+
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
+{
+ int hr = mlx5e_get_linear_rq_headroom(params, xsk);
+
+ /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+ * The condition checked in mlx5e_rx_is_linear_skb is:
+ * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
+ * (Note that hw_mtu == sw_mtu + hard_mtu.)
+ * What is returned from this function is:
+ * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
+ * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+ * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+ * because both PAGE_SIZE and S are already aligned. Any number greater
+ * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+ * so max_mtu is the maximum MTU allowed.
+ */
+
+ return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
+static inline bool
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
+ struct page *page, struct xdp_buff *xdp)
+{
+ struct skb_shared_info *sinfo = NULL;
+ struct mlx5e_xmit_data xdptxd;
+ struct mlx5e_xdp_info xdpi;
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ int i;
+
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf))
+ return false;
+
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+
+ if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
+ /* The xdp_buff was in the UMEM and was copied into a newly
+ * allocated page. The UMEM page was returned via the ZCA, and
+ * this new page has to be mapped at this point and has to be
+ * unmapped and returned via xdp_return_frame on completion.
+ */
+
+ /* Prevent double recycling of the UMEM page. Even in case this
+ * function returns false, the xdp_buff shouldn't be recycled,
+ * as it was already done in xdp_convert_zc_to_xdp_frame.
+ */
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+
+ dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(sq->pdev, dma_addr)) {
+ xdp_return_frame(xdpf);
+ return false;
+ }
+
+ xdptxd.dma_addr = dma_addr;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = dma_addr;
+
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
+ return false;
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ return true;
+ }
+
+ /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
+ * that points to the same memory region as the original xdp_buff. It
+ * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
+ * mode.
+ */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+ xdpi.page.rq = rq;
+
+ dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+ u32 len;
+
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+ len = skb_frag_size(frag);
+ dma_sync_single_for_device(sq->pdev, addr, len,
+ DMA_BIDIRECTIONAL);
+ }
+ }
+
+ xdptxd.dma_addr = dma_addr;
+
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
+ return false;
+
+ xdpi.page.page = page;
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ xdpi.page.page = skb_frag_page(frag);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ }
+ }
+
+ return true;
+}
+
+/* returns true if packet was consumed by xdp */
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp)
+{
+ u32 act;
+ int err;
+
+ act = bpf_prog_run_xdp(prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ return false;
+ case XDP_TX:
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp)))
+ goto xdp_abort;
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+ return true;
+ case XDP_REDIRECT:
+ /* When XDP enabled then page-refcnt==1 here */
+ err = xdp_do_redirect(rq->netdev, xdp, prog);
+ if (unlikely(err))
+ goto xdp_abort;
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
+ __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
+ if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
+ mlx5e_page_dma_unmap(rq, page);
+ rq->stats->xdp_redirect++;
+ return true;
+ default:
+ bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+xdp_abort:
+ trace_xdp_exception(rq->netdev, prog, act);
+ fallthrough;
+ case XDP_DROP:
+ rq->stats->xdp_drop++;
+ return true;
+ }
+}
+
+static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_xdp_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = 1,
+ .num_pkts = 0,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nops += contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ net_prefetchw(wqe->data);
+
+ *session = (struct mlx5e_tx_mpwqe) {
+ .wqe = wqe,
+ .bytes_count = 0,
+ .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+ .pkt_count = 0,
+ .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
+ };
+
+ stats->mpwqe++;
+}
+
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
+ u16 ds_count = session->ds_count;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+ wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
+ wi->num_pkts = session->pkt_count;
+
+ sq->pc += wi->num_wqebbs;
+
+ sq->doorbell_cseg = cseg;
+
+ session->wqe = NULL; /* Close session */
+}
+
+enum {
+ MLX5E_XDP_CHECK_OK = 1,
+ MLX5E_XDP_CHECK_START_MPWQE = 2,
+};
+
+INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!sq->mpwqe.wqe)) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ sq->stop_room))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_START_MPWQE;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
+INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result);
+
+INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+ if (unlikely(sinfo)) {
+ /* MPWQE is enabled, but a multi-buffer packet is queued for
+ * transmission. MPWQE can't send fragmented packets, so close
+ * the current session and fall back to a regular WQE.
+ */
+ if (unlikely(sq->mpwqe.wqe))
+ mlx5e_xdp_mpwqe_complete(sq);
+ return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
+ }
+
+ if (unlikely(xdptxd->len > sq->hw_mtu)) {
+ stats->err++;
+ return false;
+ }
+
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
+ if (unlikely(check_result < 0))
+ return false;
+
+ if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
+ /* Start the session when nothing can fail, so it's guaranteed
+ * that if there is an active session, it has at least one dseg,
+ * and it's safe to complete it at any time.
+ */
+ mlx5e_xdp_mpwqe_session_start(sq);
+ }
+
+ mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
+
+ if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ stats->xmit++;
+ return true;
+}
+
+static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
+INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+ return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
+}
+
+INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5e_tx_wqe *wqe;
+
+ dma_addr_t dma_addr = xdptxd->dma_addr;
+ u32 dma_len = xdptxd->len;
+ u16 ds_cnt, inline_hdr_sz;
+ u8 num_wqebbs = 1;
+ int num_frags = 0;
+ u16 pi;
+
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+ if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
+ stats->err++;
+ return false;
+ }
+
+ ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
+ ds_cnt++;
+
+ /* check_result must be 0 if sinfo is passed. */
+ if (!check_result) {
+ int stop_room = 1;
+
+ if (unlikely(sinfo)) {
+ ds_cnt += sinfo->nr_frags;
+ num_frags = sinfo->nr_frags;
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
+ * enough to hold all fragments.
+ */
+ stop_room = MLX5E_STOP_ROOM(num_wqebbs);
+ }
+
+ check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
+ }
+ if (unlikely(check_result < 0))
+ return false;
+
+ pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ net_prefetchw(wqe);
+
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ inline_hdr_sz = 0;
+
+ /* copy the inline part if required */
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
+ memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
+ MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
+ dma_len -= MLX5E_XDP_MIN_INLINE;
+ dma_addr += MLX5E_XDP_MIN_INLINE;
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ dseg++;
+ }
+
+ /* write the dma part */
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
+
+ if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
+ u8 num_pkts = 1 + num_frags;
+ int i;
+
+ memset(&cseg->trailer, 0, sizeof(cseg->trailer));
+ memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
+
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+ dseg->lkey = sq->mkey_be;
+
+ for (i = 0; i < num_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+
+ dseg++;
+ dseg->addr = cpu_to_be64(addr);
+ dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
+ dseg->lkey = sq->mkey_be;
+ }
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+
+ sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = num_wqebbs,
+ .num_pkts = num_pkts,
+ };
+
+ sq->pc += num_wqebbs;
+ } else {
+ cseg->fm_ce_se = 0;
+
+ sq->pc++;
+ }
+
+ sq->doorbell_cseg = cseg;
+
+ stats->xmit++;
+ return true;
+}
+
+static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_wqe_info *wi,
+ u32 *xsk_frames,
+ bool recycle,
+ struct xdp_frame_bulk *bq)
+{
+ struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+ u16 i;
+
+ for (i = 0; i < wi->num_pkts; i++) {
+ struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+
+ switch (xdpi.mode) {
+ case MLX5E_XDP_XMIT_MODE_FRAME:
+ /* XDP_TX from the XSK RQ and XDP_REDIRECT */
+ dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
+ xdpi.frame.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame_bulk(xdpi.frame.xdpf, bq);
+ break;
+ case MLX5E_XDP_XMIT_MODE_PAGE:
+ /* XDP_TX from the regular RQ */
+ mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
+ break;
+ case MLX5E_XDP_XMIT_MODE_XSK:
+ /* AF_XDP send */
+ (*xsk_frames)++;
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ }
+ }
+}
+
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
+{
+ struct xdp_frame_bulk bq;
+ struct mlx5e_xdpsq *sq;
+ struct mlx5_cqe64 *cqe;
+ u32 xsk_frames = 0;
+ u16 sqcc;
+ int i;
+
+ xdp_frame_bulk_init(&bq);
+
+ sq = container_of(cq, struct mlx5e_xdpsq, cq);
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return false;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ i = 0;
+ do {
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 wqe_counter, ci;
+ bool last_wqe;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ do {
+ last_wqe = (sqcc == wqe_counter);
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+
+ sqcc += wi->num_wqebbs;
+
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq);
+ } while (!last_wqe);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ netdev_WARN_ONCE(sq->channel->netdev,
+ "Bad OP in XDPSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+ mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
+ (struct mlx5_err_cqe *)cqe);
+ mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
+ }
+ } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+ xdp_flush_frame_bulk(&bq);
+
+ if (xsk_frames)
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
+
+ sq->stats->cqes += i;
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->cc = sqcc;
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
+{
+ struct xdp_frame_bulk bq;
+ u32 xsk_frames = 0;
+
+ xdp_frame_bulk_init(&bq);
+
+ rcu_read_lock(); /* need for xdp_return_frame_bulk */
+
+ while (sq->cc != sq->pc) {
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ wi = &sq->db.wqe_info[ci];
+
+ sq->cc += wi->num_wqebbs;
+
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq);
+ }
+
+ xdp_flush_frame_bulk(&bq);
+ rcu_read_unlock();
+
+ if (xsk_frames)
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
+}
+
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_xdpsq *sq;
+ int nxmit = 0;
+ int sq_num;
+ int i;
+
+ /* this flag is sufficient, no need to test internal sq state */
+ if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
+ return -ENETDOWN;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ sq_num = smp_processor_id();
+
+ if (unlikely(sq_num >= priv->channels.num))
+ return -ENXIO;
+
+ sq = &priv->channels.c[sq_num]->xdpsq;
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ struct mlx5e_xmit_data xdptxd;
+ struct mlx5e_xdp_info xdpi;
+ bool ret;
+
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+ xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
+ xdptxd.len, DMA_TO_DEVICE);
+
+ if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr)))
+ break;
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = xdptxd.dma_addr;
+
+ ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
+ if (unlikely(!ret)) {
+ dma_unmap_single(sq->pdev, xdptxd.dma_addr,
+ xdptxd.len, DMA_TO_DEVICE);
+ break;
+ }
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ nxmit++;
+ }
+
+ if (flags & XDP_XMIT_FLUSH) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+ mlx5e_xmit_xdp_doorbell(sq);
+ }
+
+ return nxmit;
+}
+
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
+{
+ struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
+
+ if (xdpsq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(xdpsq);
+
+ mlx5e_xmit_xdp_doorbell(xdpsq);
+
+ if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) {
+ xdp_do_flush_map();
+ __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
+ }
+}
+
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
+{
+ sq->xmit_xdp_frame_check = is_mpw ?
+ mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
+ sq->xmit_xdp_frame = is_mpw ?
+ mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
new file mode 100644
index 000000000..bc2d9034a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_XDP_H__
+#define __MLX5_EN_XDP_H__
+
+#include <linux/indirect_call_wrapper.h>
+
+#include "en.h"
+#include "en/txrx.h"
+
+#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+
+#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16
+#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \
+ (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
+ sizeof(struct mlx5_wqe_inline_seg))
+
+struct mlx5e_xsk_param;
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp);
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo,
+ int check_result));
+INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo,
+ int check_result));
+INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
+INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
+
+static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+
+ if (priv->channels.params.xdp_prog)
+ set_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
+}
+
+static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
+{
+ if (priv->channels.params.xdp_prog)
+ clear_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
+
+ clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+ /* Let other device's napi(s) and XSK wakeups see our new state. */
+ synchronize_net();
+}
+
+static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
+}
+
+static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
+{
+ if (sq->doorbell_cseg) {
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+ sq->doorbell_cseg = NULL;
+ }
+}
+
+/* Enable inline WQEs to shift some load from a congested HCA (HW) to
+ * a less congested cpu (SW).
+ */
+static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
+{
+ u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc;
+
+#define MLX5E_XDP_INLINE_WATERMARK_LOW 10
+#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128
+
+ if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
+ return false;
+
+ if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
+ return true;
+
+ return cur;
+}
+
+static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
+{
+ if (session->inline_on)
+ return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
+ max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
+
+ return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs);
+}
+
+struct mlx5e_xdp_wqe_info {
+ u8 num_wqebbs;
+ u8 num_pkts;
+};
+
+static inline void
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xmit_data *xdptxd,
+ struct mlx5e_xdpsq_stats *stats)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg =
+ (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
+ u32 dma_len = xdptxd->len;
+
+ session->pkt_count++;
+ session->bytes_count += dma_len;
+
+ if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
+ struct mlx5_wqe_inline_seg *inline_dseg =
+ (struct mlx5_wqe_inline_seg *)dseg;
+ u16 ds_len = sizeof(*inline_dseg) + dma_len;
+ u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS);
+
+ inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG);
+ memcpy(inline_dseg->data, xdptxd->data, dma_len);
+
+ session->ds_count += ds_cnt;
+ stats->inlnw++;
+ return;
+ }
+
+ dseg->addr = cpu_to_be64(xdptxd->dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+ dseg->lkey = sq->mkey_be;
+ session->ds_count++;
+}
+
+static inline void
+mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
+ struct mlx5e_xdp_info *xi)
+{
+ u32 i = (*fifo->pc)++ & fifo->mask;
+
+ fifo->xi[i] = *xi;
+}
+
+static inline struct mlx5e_xdp_info
+mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
+{
+ return fifo->xi[(*fifo->cc)++ & fifo->mask];
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
new file mode 100644
index 000000000..ebada0c5a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#include <net/xdp_sock_drv.h>
+#include "pool.h"
+#include "setup.h"
+#include "en/params.h"
+
+static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
+{
+ struct device *dev = mlx5_core_dma_dev(priv->mdev);
+
+ return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
+{
+ return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk)
+{
+ if (!xsk->pools) {
+ xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+ sizeof(*xsk->pools), GFP_KERNEL);
+ if (unlikely(!xsk->pools))
+ return -ENOMEM;
+ }
+
+ xsk->refcnt++;
+ xsk->ever_used = true;
+
+ return 0;
+}
+
+static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk)
+{
+ if (!--xsk->refcnt) {
+ kfree(xsk->pools);
+ xsk->pools = NULL;
+ }
+}
+
+static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix)
+{
+ int err;
+
+ err = mlx5e_xsk_get_pools(xsk);
+ if (unlikely(err))
+ return err;
+
+ xsk->pools[ix] = pool;
+ return 0;
+}
+
+static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix)
+{
+ xsk->pools[ix] = NULL;
+
+ mlx5e_xsk_put_pools(xsk);
+}
+
+static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool)
+{
+ return xsk_pool_get_headroom(pool) <= 0xffff &&
+ xsk_pool_get_chunk_size(pool) <= 0xffff;
+}
+
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk)
+{
+ xsk->headroom = xsk_pool_get_headroom(pool);
+ xsk->chunk_size = xsk_pool_get_chunk_size(pool);
+ xsk->unaligned = pool->unaligned;
+}
+
+static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool, u16 ix)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ int err;
+
+ if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix)))
+ return -EBUSY;
+
+ if (unlikely(!mlx5e_xsk_is_pool_sane(pool)))
+ return -EINVAL;
+
+ err = mlx5e_xsk_map_pool(priv, pool);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix);
+ if (unlikely(err))
+ goto err_unmap_pool;
+
+ mlx5e_build_xsk_param(pool, &xsk);
+
+ if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) {
+ const char *recommendation = is_power_of_2(xsk.chunk_size) ?
+ "Upgrade firmware" : "Disable striding RQ";
+
+ mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n",
+ xsk.chunk_size, recommendation);
+ }
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ /* XSK objects will be created on open. */
+ goto validate_closed;
+ }
+
+ if (!params->xdp_prog) {
+ /* XSK objects will be created when an XDP program is set,
+ * and the channels are reopened.
+ */
+ goto validate_closed;
+ }
+
+ c = priv->channels.c[ix];
+
+ err = mlx5e_open_xsk(priv, params, &xsk, pool, c);
+ if (unlikely(err))
+ goto err_remove_pool;
+
+ mlx5e_activate_xsk(c);
+ mlx5e_trigger_napi_icosq(c);
+
+ /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+ * any Fill Ring entries at the setup stage.
+ */
+
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true);
+
+ mlx5e_deactivate_rq(&c->rq);
+ mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY);
+
+ return 0;
+
+err_remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
+
+err_unmap_pool:
+ mlx5e_xsk_unmap_pool(priv, pool);
+
+ return err;
+
+validate_closed:
+ /* Check the configuration in advance, rather than fail at a later stage
+ * (in mlx5e_xdp_set or on open) and end up with no channels.
+ */
+ if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+ err = -EINVAL;
+ goto err_remove_pool;
+ }
+
+ return 0;
+}
+
+static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+ struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params,
+ &priv->xsk, ix);
+ struct mlx5e_channel *c;
+
+ if (unlikely(!pool))
+ return -EINVAL;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto remove_pool;
+
+ /* XSK RQ and SQ are only created if XDP program is set. */
+ if (!priv->channels.params.xdp_prog)
+ goto remove_pool;
+
+ c = priv->channels.c[ix];
+
+ mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_icosq(c);
+ mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT);
+
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false);
+
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
+ mlx5e_xsk_unmap_pool(priv, pool);
+
+ return 0;
+}
+
+static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool,
+ u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_enable_locked(priv, pool, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_disable_locked(priv, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+
+ if (unlikely(qid >= params->num_channels))
+ return -EINVAL;
+
+ return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) :
+ mlx5e_xsk_disable_pool(priv, qid);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
new file mode 100644
index 000000000..dca0010a0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_XSK_POOL_H__
+#define __MLX5_EN_XSK_POOL_H__
+
+#include "en.h"
+
+static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params,
+ struct mlx5e_xsk *xsk, u16 ix)
+{
+ if (!xsk || !xsk->pools)
+ return NULL;
+
+ if (unlikely(ix >= params->num_channels))
+ return NULL;
+
+ return xsk->pools[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid);
+
+#endif /* __MLX5_EN_XSK_POOL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
new file mode 100644
index 000000000..c91b54d9f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rx.h"
+#include "en/xdp.h"
+#include <net/xdp_sock_drv.h>
+#include <linux/filter.h>
+
+/* RX data path */
+
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ struct mlx5e_icosq *icosq = rq->icosq;
+ struct mlx5_wq_cyc *wq = &icosq->wq;
+ struct mlx5e_umr_wqe *umr_wqe;
+ int batch, i;
+ u32 offset; /* 17-bit value with MTT. */
+ u16 pi;
+
+ if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
+ goto err;
+
+ BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
+ batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
+ rq->mpwqe.pages_per_wqe);
+
+ /* If batch < pages_per_wqe, either:
+ * 1. Some (or all) descriptors were invalid.
+ * 2. dma_need_sync is true, and it fell back to allocating one frame.
+ * In either case, try to continue allocating frames one by one, until
+ * the first error, which will mean there are no more valid descriptors.
+ */
+ for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
+ wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!wi->alloc_units[batch].xsk))
+ goto err_reuse_batch;
+ }
+
+ pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
+
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
+ }
+ } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ }
+ } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
+ u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size * 2),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ };
+ }
+ } else {
+ __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
+ rq->xsk_pool->chunk_size);
+ __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ .bcount = frame_size,
+ };
+ umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ .bcount = pad_size,
+ };
+ }
+ }
+
+ bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ wi->consumed_strides = 0;
+
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
+
+ /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
+ offset = ix * rq->mpwqe.mtts_per_wqe;
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
+ offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
+ offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+
+ icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
+ .num_wqebbs = rq->mpwqe.umr_wqebbs,
+ .umr.rq = rq,
+ };
+
+ icosq->pc += rq->mpwqe.umr_wqebbs;
+
+ icosq->doorbell_cseg = &umr_wqe->ctrl;
+
+ return 0;
+
+err_reuse_batch:
+ while (--batch >= 0)
+ xsk_buff_free(wi->alloc_units[batch].xsk);
+
+err:
+ rq->stats->buff_alloc_err++;
+ return -ENOMEM;
+}
+
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct xdp_buff **buffs;
+ u32 contig, alloc;
+ int i;
+
+ /* mlx5e_init_frags_partition creates a 1:1 mapping between
+ * rq->wqe.frags and rq->wqe.alloc_units, which allows us to
+ * allocate XDP buffers straight into alloc_units.
+ */
+ BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
+ sizeof(rq->wqe.alloc_units[0].xsk));
+ buffs = (struct xdp_buff **)rq->wqe.alloc_units;
+ contig = mlx5_wq_cyc_get_size(wq) - ix;
+ if (wqe_bulk <= contig) {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
+ } else {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
+ if (likely(alloc == contig))
+ alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
+ }
+
+ for (i = 0; i < alloc; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return alloc;
+}
+
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int i;
+
+ for (i = 0; i < wqe_bulk; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!frag->au->xsk))
+ return i;
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return wqe_bulk;
+}
+
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
+{
+ u32 totallen = xdp->data_end - xdp->data_meta;
+ u32 metalen = xdp->data - xdp->data_meta;
+ struct sk_buff *skb;
+
+ skb = napi_alloc_skb(rq->cq.napi, totallen);
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ skb_put_data(skb, xdp->data_meta, totallen);
+
+ if (metalen) {
+ skb_metadata_set(skb, metalen);
+ __skb_pull(skb, metalen);
+ }
+
+ return skb;
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx)
+{
+ struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk;
+ struct bpf_prog *prog;
+
+ /* Check packet size. Note LRO doesn't use linear SKB */
+ if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+ rq->stats->oversize_pkts_sw_drop++;
+ return NULL;
+ }
+
+ /* head_offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, in
+ * the current implementation, UMR pages are mapped to XSK frames, so
+ * head_offset should always be 0.
+ */
+ WARN_ON_ONCE(head_offset);
+
+ xsk_buff_set_size(xdp, cqe_bcnt);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
+
+ /* Possible flows:
+ * - XDP_REDIRECT to XSKMAP:
+ * The page is owned by the userspace from now.
+ * - XDP_TX and other XDP_REDIRECTs:
+ * The page was returned by ZCA and recycled.
+ * - XDP_DROP:
+ * Recycle the page.
+ * - XDP_PASS:
+ * Allocate an SKB, copy the data and recycle the page.
+ *
+ * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
+ * size is the same as the Driver RX Ring's size, and pages for WQEs are
+ * allocated first from the Reuse Ring, so it has enough space.
+ */
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) {
+ if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
+ * frame. On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, xdp);
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
+{
+ struct xdp_buff *xdp = wi->au->xsk;
+ struct bpf_prog *prog;
+
+ /* wi->offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, the
+ * XSK allocator allocates frames per packet, instead of pages, so
+ * wi->offset should always be 0.
+ */
+ WARN_ON_ONCE(wi->offset);
+
+ xsk_buff_set_size(xdp, cqe_bcnt);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
+
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp)))
+ return NULL; /* page/packet was consumed by XDP */
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+ * will be handled by mlx5e_free_rx_wqe.
+ * On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, xdp);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
new file mode 100644
index 000000000..087c943bd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_RX_H__
+#define __MLX5_EN_XSK_RX_H__
+
+#include "en.h"
+
+/* RX data path */
+
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt);
+
+#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
new file mode 100644
index 000000000..ff03c4383
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "setup.h"
+#include "en/params.h"
+#include "en/txrx.h"
+#include "en/health.h"
+#include <net/xdp_sock_drv.h>
+
+/* The limitation of 2048 can be altered, but shouldn't go beyond the minimal
+ * stride size of striding RQ.
+ */
+#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE)
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev)
+{
+ /* AF_XDP doesn't support frames larger than PAGE_SIZE. */
+ if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
+ return false;
+
+ /* frag_sz is different for regular and XSK RQs, so ensure that linear
+ * SKB mode is possible.
+ */
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk);
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ return mlx5e_rx_is_linear_skb(mdev, params, xsk);
+ }
+}
+
+static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ u16 q_counter,
+ struct mlx5e_channel_param *cparam)
+{
+ mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq);
+ mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq);
+}
+
+static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct xsk_buff_pool *pool,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ int rq_xdp_ix;
+ int err;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = c->netdev;
+ rq->priv = c->priv;
+ rq->tstamp = c->tstamp;
+ rq->clock = &mdev->clock;
+ rq->icosq = &c->icosq;
+ rq->ix = c->ix;
+ rq->channel = c;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->xdpsq = &c->rq_xdpsq;
+ rq->xsk_pool = pool;
+ rq->stats = &c->priv->channel_stats[c->ix]->xskrq;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+ rq_xdp_ix = c->ix;
+ err = mlx5e_rq_set_handlers(rq, params, xsk);
+ if (err)
+ return err;
+
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0);
+}
+
+static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool,
+ struct mlx5e_xsk_param *xsk)
+{
+ int err;
+
+ err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq);
+ if (err)
+ return err;
+
+ return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq);
+}
+
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
+ struct mlx5e_channel *c)
+{
+ struct mlx5e_channel_param *cparam;
+ struct mlx5e_create_cq_param ccp;
+ int err;
+
+ mlx5e_build_create_cq_param(&ccp, c);
+
+ if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
+ return -EINVAL;
+
+ cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL);
+ if (!cparam)
+ return -ENOMEM;
+
+ mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam);
+
+ err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
+ &c->xskrq.cq);
+ if (unlikely(err))
+ goto err_free_cparam;
+
+ err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk);
+ if (unlikely(err))
+ goto err_close_rx_cq;
+
+ err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp,
+ &c->xsksq.cq);
+ if (unlikely(err))
+ goto err_close_rq;
+
+ /* Create a separate SQ, so that when the buff pool is disabled, we could
+ * close this SQ safely and stop receiving CQEs. In other case, e.g., if
+ * the XDPSQ was used instead, we might run into trouble when the buff pool
+ * is disabled and then re-enabled, but the SQ continues receiving CQEs
+ * from the old buff pool.
+ */
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true);
+ if (unlikely(err))
+ goto err_close_tx_cq;
+
+ kvfree(cparam);
+
+ set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
+ return 0;
+
+err_close_tx_cq:
+ mlx5e_close_cq(&c->xsksq.cq);
+
+err_close_rq:
+ mlx5e_close_rq(&c->xskrq);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->xskrq.cq);
+
+err_free_cparam:
+ kvfree(cparam);
+
+ return err;
+}
+
+void mlx5e_close_xsk(struct mlx5e_channel *c)
+{
+ clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+ synchronize_net(); /* Sync with NAPI. */
+
+ mlx5e_close_rq(&c->xskrq);
+ mlx5e_close_cq(&c->xskrq.cq);
+ mlx5e_close_xdpsq(&c->xsksq);
+ mlx5e_close_cq(&c->xsksq.cq);
+
+ memset(&c->xskrq, 0, sizeof(c->xskrq));
+ memset(&c->xsksq, 0, sizeof(c->xsksq));
+}
+
+void mlx5e_activate_xsk(struct mlx5e_channel *c)
+{
+ /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid
+ * activating XSKRQ in the middle of recovery.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ mlx5e_reporter_icosq_resume_recovery(c);
+
+ /* TX queue is created active. */
+}
+
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
+{
+ /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the
+ * middle of recovery. Suspend the recovery to avoid it.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ mlx5e_reporter_icosq_resume_recovery(c);
+ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
+
+ /* TX queue is disabled on close. */
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
new file mode 100644
index 000000000..50e111b85
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_SETUP_H__
+#define __MLX5_EN_XSK_SETUP_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param;
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev);
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
+ struct mlx5e_channel *c);
+void mlx5e_close_xsk(struct mlx5e_channel *c);
+void mlx5e_activate_xsk(struct mlx5e_channel *c);
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
+
+#endif /* __MLX5_EN_XSK_SETUP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
new file mode 100644
index 000000000..367a9505c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "tx.h"
+#include "pool.h"
+#include "en/xdp.h"
+#include "en/params.h"
+#include <net/xdp_sock_drv.h>
+
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_channel *c;
+
+ if (unlikely(!mlx5e_xdp_is_active(priv)))
+ return -ENETDOWN;
+
+ if (unlikely(qid >= params->num_channels))
+ return -EINVAL;
+
+ c = priv->channels.c[qid];
+
+ if (!napi_if_scheduled_mark_missed(&c->napi)) {
+ /* To avoid WQE overrun, don't post a NOP if async_icosq is not
+ * active and not polled by NAPI. Return 0, because the upcoming
+ * activate will trigger the IRQ for us.
+ */
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->async_icosq.state)))
+ return 0;
+
+ if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state))
+ return 0;
+
+ mlx5e_trigger_napi_icosq(c);
+ }
+
+ return 0;
+}
+
+/* When TX fails (because of the size of the packet), we need to get completions
+ * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish
+ * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the
+ * same.
+ */
+static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_info *xdpi)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+ struct mlx5e_tx_wqe *nopwqe;
+
+ wi->num_wqebbs = 1;
+ wi->num_pkts = 1;
+
+ nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ sq->doorbell_cseg = &nopwqe->ctrl;
+}
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
+{
+ struct xsk_buff_pool *pool = sq->xsk_pool;
+ struct mlx5e_xmit_data xdptxd;
+ struct mlx5e_xdp_info xdpi;
+ bool work_done = true;
+ bool flush = false;
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
+
+ for (; budget; budget--) {
+ int check_result = INDIRECT_CALL_2(sq->xmit_xdp_frame_check,
+ mlx5e_xmit_xdp_frame_check_mpwqe,
+ mlx5e_xmit_xdp_frame_check,
+ sq);
+ struct xdp_desc desc;
+ bool ret;
+
+ if (unlikely(check_result < 0)) {
+ work_done = false;
+ break;
+ }
+
+ if (!xsk_tx_peek_desc(pool, &desc)) {
+ /* TX will get stuck until something wakes it up by
+ * triggering NAPI. Currently it's expected that the
+ * application calls sendto() if there are consumed, but
+ * not completed frames.
+ */
+ break;
+ }
+
+ xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr);
+ xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr);
+ xdptxd.len = desc.len;
+
+ xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
+
+ ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
+ check_result);
+ if (unlikely(!ret)) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ mlx5e_xsk_tx_post_err(sq, &xdpi);
+ } else {
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ }
+
+ flush = true;
+ }
+
+ if (flush) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+ mlx5e_xmit_xdp_doorbell(sq);
+
+ xsk_tx_release(pool);
+ }
+
+ return !(budget && work_done);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
new file mode 100644
index 000000000..9c505158b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_TX_H__
+#define __MLX5_EN_XSK_TX_H__
+
+#include "en.h"
+
+/* TX data path */
+
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
+
+#endif /* __MLX5_EN_XSK_TX_H__ */