Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/net/ethernet/mellanox/mlx5/core/lib
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
40 files changed, 10290 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
new file mode 100644
index 0000000000..58bd749b5e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/transobj.h>
+#include "clock.h"
+#include "aso.h"
+#include "wq.h"
+
+struct mlx5_aso_cq {
+	/* data path - accessed per cqe */
+	struct mlx5_cqwq           wq;
+
+	/* data path - accessed per napi poll */
+	struct mlx5_core_cq        mcq;
+
+	/* control */
+	struct mlx5_core_dev      *mdev;
+	struct mlx5_wq_ctrl        wq_ctrl;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5_aso {
+	/* data path */
+	u16                        cc;
+	u16                        pc;
+
+	struct mlx5_wqe_ctrl_seg  *doorbell_cseg;
+	struct mlx5_aso_cq         cq;
+
+	/* read only */
+	struct mlx5_wq_cyc         wq;
+	void __iomem              *uar_map;
+	u32                        sqn;
+
+	/* control path */
+	struct mlx5_wq_ctrl        wq_ctrl;
+
+} ____cacheline_aligned_in_smp;
+
+static void mlx5_aso_free_cq(struct mlx5_aso_cq *cq)
+{
+	mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5_aso_alloc_cq(struct mlx5_core_dev *mdev, int numa_node,
+			     void *cqc_data, struct mlx5_aso_cq *cq)
+{
+	struct mlx5_core_cq *mcq = &cq->mcq;
+	struct mlx5_wq_param param;
+	int err;
+	u32 i;
+
+	param.buf_numa_node = numa_node;
+	param.db_numa_node = numa_node;
+
+	err = mlx5_cqwq_create(mdev, &param, cqc_data, &cq->wq, &cq->wq_ctrl);
+	if (err)
+		return err;
+
+	mcq->cqe_sz     = 64;
+	mcq->set_ci_db  = cq->wq_ctrl.db.db;
+	mcq->arm_db     = cq->wq_ctrl.db.db + 1;
+
+	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+		cqe->op_own = 0xf1;
+	}
+
+	cq->mdev = mdev;
+
+	return 0;
+}
+
+static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data)
+{
+	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+	struct mlx5_core_dev *mdev = cq->mdev;
+	struct mlx5_core_cq *mcq = &cq->mcq;
+	void *in, *cqc;
+	int inlen, eqn;
+	int err;
+
+	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
+	if (err)
+		return err;
+
+	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+		sizeof(u64) * cq->wq_ctrl.buf.npages;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+	memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc));
+
+	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
+				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+
+	MLX5_SET(cqc,   cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+	MLX5_SET(cqc,   cqc, c_eqn_or_apu_element, eqn);
+	MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
+	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+					    MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
+
+	err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
+
+	kvfree(in);
+
+	return err;
+}
+
+static void mlx5_aso_destroy_cq(struct mlx5_aso_cq *cq)
+{
+	mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
+	mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5_aso_create_cq(struct mlx5_core_dev *mdev, int numa_node,
+			      struct mlx5_aso_cq *cq)
+{
+	void *cqc_data;
+	int err;
+
+	cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL);
+	if (!cqc_data)
+		return -ENOMEM;
+
+	MLX5_SET(cqc, cqc_data, log_cq_size, 1);
+	MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index);
+	if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
+		MLX5_SET(cqc, cqc_data, cqe_sz, CQE_STRIDE_128_PAD);
+
+	err = mlx5_aso_alloc_cq(mdev, numa_node, cqc_data, cq);
+	if (err) {
+		mlx5_core_err(mdev, "Failed to alloc aso wq cq, err=%d\n", err);
+		goto err_out;
+	}
+
+	err = create_aso_cq(cq, cqc_data);
+	if (err) {
+		mlx5_core_err(mdev, "Failed to create aso wq cq, err=%d\n", err);
+		goto err_free_cq;
+	}
+
+	kvfree(cqc_data);
+	return 0;
+
+err_free_cq:
+	mlx5_aso_free_cq(cq);
+err_out:
+	kvfree(cqc_data);
+	return err;
+}
+
+static int mlx5_aso_alloc_sq(struct mlx5_core_dev *mdev, int numa_node,
+			     void *sqc_data, struct mlx5_aso *sq)
+{
+	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	struct mlx5_wq_param param;
+	int err;
+
+	sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+
+	param.db_numa_node = numa_node;
+	param.buf_numa_node = numa_node;
+	err = mlx5_wq_cyc_create(mdev, &param, sqc_wq, wq, &sq->wq_ctrl);
+	if (err)
+		return err;
+	wq->db = &wq->db[MLX5_SND_DBR];
+
+	return 0;
+}
+
+static int create_aso_sq(struct mlx5_core_dev *mdev, int pdn,
+			 void *sqc_data, struct mlx5_aso *sq)
+{
+	void *in, *sqc, *wq;
+	int inlen, err;
+	u8 ts_format;
+
+	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+		sizeof(u64) * sq->wq_ctrl.buf.npages;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+	wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc));
+	MLX5_SET(sqc,  sqc, cqn, sq->cq.mcq.cqn);
+
+	MLX5_SET(sqc,  sqc, state, MLX5_SQC_STATE_RST);
+	MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
+
+	ts_format = mlx5_is_real_time_sq(mdev) ?
+			MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+			MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+	MLX5_SET(sqc, sqc, ts_format, ts_format);
+
+	MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
+	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.hw_objs.bfreg.index);
+	MLX5_SET(wq,   wq, log_wq_pg_sz,  sq->wq_ctrl.buf.page_shift -
+					  MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET64(wq, wq, dbr_addr,      sq->wq_ctrl.db.dma);
+
+	mlx5_fill_page_frag_array(&sq->wq_ctrl.buf,
+				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
+
+	kvfree(in);
+
+	return err;
+}
+
+static int mlx5_aso_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn)
+{
+	void *in, *sqc;
+	int inlen, err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
+	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
+
+	err = mlx5_core_modify_sq(mdev, sqn, in);
+
+	kvfree(in);
+
+	return err;
+}
+
+static int mlx5_aso_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn,
+				  void *sqc_data, struct mlx5_aso *sq)
+{
+	int err;
+
+	err = create_aso_sq(mdev, pdn, sqc_data, sq);
+	if (err)
+		return err;
+
+	err = mlx5_aso_set_sq_rdy(mdev, sq->sqn);
+	if (err)
+		mlx5_core_destroy_sq(mdev, sq->sqn);
+
+	return err;
+}
+
+static void mlx5_aso_free_sq(struct mlx5_aso *sq)
+{
+	mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5_aso_destroy_sq(struct mlx5_aso *sq)
+{
+	mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn);
+	mlx5_aso_free_sq(sq);
+}
+
+static int mlx5_aso_create_sq(struct mlx5_core_dev *mdev, int numa_node,
+			      u32 pdn, struct mlx5_aso *sq)
+{
+	void *sqc_data, *wq;
+	int err;
+
+	sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL);
+	if (!sqc_data)
+		return -ENOMEM;
+
+	wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+	MLX5_SET(wq, wq, pd, pdn);
+	MLX5_SET(wq, wq, log_wq_sz, 1);
+
+	err = mlx5_aso_alloc_sq(mdev, numa_node, sqc_data, sq);
+	if (err) {
+		mlx5_core_err(mdev, "Failed to alloc aso wq sq, err=%d\n", err);
+		goto err_out;
+	}
+
+	err = mlx5_aso_create_sq_rdy(mdev, pdn, sqc_data, sq);
+	if (err) {
+		mlx5_core_err(mdev, "Failed to open aso wq sq, err=%d\n", err);
+		goto err_free_asosq;
+	}
+
+	mlx5_core_dbg(mdev, "aso sq->sqn = 0x%x\n", sq->sqn);
+
+	kvfree(sqc_data);
+	return 0;
+
+err_free_asosq:
+	mlx5_aso_free_sq(sq);
+err_out:
+	kvfree(sqc_data);
+	return err;
+}
+
+struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn)
+{
+	int numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+	struct mlx5_aso *aso;
+	int err;
+
+	aso = kzalloc(sizeof(*aso), GFP_KERNEL);
+	if (!aso)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx5_aso_create_cq(mdev, numa_node, &aso->cq);
+	if (err)
+		goto err_cq;
+
+	err = mlx5_aso_create_sq(mdev, numa_node, pdn, aso);
+	if (err)
+		goto err_sq;
+
+	return aso;
+
+err_sq:
+	mlx5_aso_destroy_cq(&aso->cq);
+err_cq:
+	kfree(aso);
+	return ERR_PTR(err);
+}
+
+void mlx5_aso_destroy(struct mlx5_aso *aso)
+{
+	mlx5_aso_destroy_sq(aso);
+	mlx5_aso_destroy_cq(&aso->cq);
+	kfree(aso);
+}
+
+void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt,
+			struct mlx5_aso_wqe *aso_wqe,
+			u32 obj_id, u32 opc_mode)
+{
+	struct mlx5_wqe_ctrl_seg *cseg = &aso_wqe->ctrl;
+
+	cseg->opmod_idx_opcode = cpu_to_be32((opc_mode << MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT) |
+					     (aso->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+					     MLX5_OPCODE_ACCESS_ASO);
+	cseg->qpn_ds     = cpu_to_be32((aso->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt);
+	cseg->fm_ce_se   = MLX5_WQE_CTRL_CQ_UPDATE;
+	cseg->general_id = cpu_to_be32(obj_id);
+}
+
+struct mlx5_aso_wqe *mlx5_aso_get_wqe(struct mlx5_aso *aso)
+{
+	struct mlx5_aso_wqe *wqe;
+	u16 pi;
+
+	pi = mlx5_wq_cyc_ctr2ix(&aso->wq, aso->pc);
+	wqe = mlx5_wq_cyc_get_wqe(&aso->wq, pi);
+	memset(wqe, 0, sizeof(*wqe));
+	return wqe;
+}
+
+void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data,
+		       struct mlx5_wqe_ctrl_seg *doorbell_cseg)
+{
+	doorbell_cseg->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
+	/* ensure wqe is visible to device before updating doorbell record */
+	dma_wmb();
+
+	if (with_data)
+		aso->pc += MLX5_ASO_WQEBBS_DATA;
+	else
+		aso->pc += MLX5_ASO_WQEBBS;
+	*aso->wq.db = cpu_to_be32(aso->pc);
+
+	/* ensure doorbell record is visible to device before ringing the
+	 * doorbell
+	 */
+	wmb();
+
+	mlx5_write64((__be32 *)doorbell_cseg, aso->uar_map);
+
+	/* Ensure doorbell is written on uar_page before poll_cq */
+	WRITE_ONCE(doorbell_cseg, NULL);
+}
+
+int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data)
+{
+	struct mlx5_aso_cq *cq = &aso->cq;
+	struct mlx5_cqe64 *cqe;
+
+	cqe = mlx5_cqwq_get_cqe(&cq->wq);
+	if (!cqe)
+		return -ETIMEDOUT;
+
+	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+	 * otherwise a cq overrun may occur
+	 */
+	mlx5_cqwq_pop(&cq->wq);
+
+	if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+		struct mlx5_err_cqe *err_cqe;
+
+		mlx5_core_err(cq->mdev, "Bad OP in ASOSQ CQE: 0x%x\n",
+			      get_cqe_opcode(cqe));
+
+		err_cqe = (struct mlx5_err_cqe *)cqe;
+		mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n",
+			      err_cqe->vendor_err_synd);
+		mlx5_core_err(cq->mdev, "syndrome=%x\n",
+			      err_cqe->syndrome);
+		print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
+			       16, 1, err_cqe,
+			       sizeof(*err_cqe), false);
+	}
+
+	mlx5_cqwq_update_db_record(&cq->wq);
+
+	/* ensure cq space is freed before enabling more cqes */
+	wmb();
+
+	if (with_data)
+		aso->cc += MLX5_ASO_WQEBBS_DATA;
+	else
+		aso->cc += MLX5_ASO_WQEBBS;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h
new file mode 100644
index 0000000000..afb078bbb8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LIB_ASO_H__
+#define __MLX5_LIB_ASO_H__
+
+#include <linux/mlx5/qp.h>
+#include "mlx5_core.h"
+
+#define MLX5_ASO_WQEBBS \
+	(DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_BB))
+#define MLX5_ASO_WQEBBS_DATA \
+	(DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_BB))
+#define ASO_CTRL_READ_EN BIT(0)
+#define MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT 24
+#define MLX5_MACSEC_ASO_DS_CNT (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_DS))
+
+#define ASO_CTRL_READ_EN BIT(0)
+struct mlx5_wqe_aso_ctrl_seg {
+	__be32  va_h;
+	__be32  va_l; /* include read_enable */
+	__be32  l_key;
+	u8      data_mask_mode;
+	u8      condition_1_0_operand;
+	u8      condition_1_0_offset;
+	u8      data_offset_condition_operand;
+	__be32  condition_0_data;
+	__be32  condition_0_mask;
+	__be32  condition_1_data;
+	__be32  condition_1_mask;
+	__be64  bitwise_data;
+	__be64  data_mask;
+};
+
+struct mlx5_wqe_aso_data_seg {
+	__be32  bytewise_data[16];
+};
+
+struct mlx5_aso_wqe {
+	struct mlx5_wqe_ctrl_seg      ctrl;
+	struct mlx5_wqe_aso_ctrl_seg  aso_ctrl;
+};
+
+struct mlx5_aso_wqe_data {
+	struct mlx5_wqe_ctrl_seg      ctrl;
+	struct mlx5_wqe_aso_ctrl_seg  aso_ctrl;
+	struct mlx5_wqe_aso_data_seg  aso_data;
+};
+
+enum {
+	MLX5_ASO_LOGICAL_AND,
+	MLX5_ASO_LOGICAL_OR,
+};
+
+enum {
+	MLX5_ASO_ALWAYS_FALSE,
+	MLX5_ASO_ALWAYS_TRUE,
+	MLX5_ASO_EQUAL,
+	MLX5_ASO_NOT_EQUAL,
+	MLX5_ASO_GREATER_OR_EQUAL,
+	MLX5_ASO_LESSER_OR_EQUAL,
+	MLX5_ASO_LESSER,
+	MLX5_ASO_GREATER,
+	MLX5_ASO_CYCLIC_GREATER,
+	MLX5_ASO_CYCLIC_LESSER,
+};
+
+enum {
+	MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT,
+	MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE,
+	MLX5_ASO_DATA_MASK_MODE_CALCULATED_64BYTE,
+};
+
+enum {
+	MLX5_ACCESS_ASO_OPC_MOD_IPSEC = 0x0,
+	MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER = 0x2,
+	MLX5_ACCESS_ASO_OPC_MOD_MACSEC = 0x5,
+};
+
+struct mlx5_aso;
+
+struct mlx5_aso_wqe *mlx5_aso_get_wqe(struct mlx5_aso *aso);
+void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt,
+			struct mlx5_aso_wqe *aso_wqe,
+			u32 obj_id, u32 opc_mode);
+void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data,
+		       struct mlx5_wqe_ctrl_seg *doorbell_cseg);
+int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data);
+
+struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn);
+void mlx5_aso_destroy(struct mlx5_aso *aso);
+#endif /* __MLX5_LIB_ASO_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
new file mode 100644
index 0000000000..0c83ef1742
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -0,0 +1,1088 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/ptp_clock_kernel.h>
+#include <rdma/mlx5-abi.h>
+#include "lib/eq.h"
+#include "en.h"
+#include "clock.h"
+
+enum {
+	MLX5_PIN_MODE_IN		= 0x0,
+	MLX5_PIN_MODE_OUT		= 0x1,
+};
+
+enum {
+	MLX5_OUT_PATTERN_PULSE		= 0x0,
+	MLX5_OUT_PATTERN_PERIODIC	= 0x1,
+};
+
+enum {
+	MLX5_EVENT_MODE_DISABLE	= 0x0,
+	MLX5_EVENT_MODE_REPETETIVE	= 0x1,
+	MLX5_EVENT_MODE_ONCE_TILL_ARM	= 0x2,
+};
+
+enum {
+	MLX5_MTPPS_FS_ENABLE			= BIT(0x0),
+	MLX5_MTPPS_FS_PATTERN			= BIT(0x2),
+	MLX5_MTPPS_FS_PIN_MODE			= BIT(0x3),
+	MLX5_MTPPS_FS_TIME_STAMP		= BIT(0x4),
+	MLX5_MTPPS_FS_OUT_PULSE_DURATION	= BIT(0x5),
+	MLX5_MTPPS_FS_ENH_OUT_PER_ADJ		= BIT(0x7),
+	MLX5_MTPPS_FS_NPPS_PERIOD               = BIT(0x9),
+	MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS     = BIT(0xa),
+};
+
+enum {
+	MLX5_MTUTC_OPERATION_ADJUST_TIME_MIN          = S16_MIN,
+	MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX          = S16_MAX,
+	MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MIN = -200000,
+	MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000,
+};
+
+static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev)
+{
+	return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev));
+}
+
+static bool mlx5_npps_real_time_supported(struct mlx5_core_dev *mdev)
+{
+	return (mlx5_real_time_mode(mdev) &&
+		MLX5_CAP_MCAM_FEATURE(mdev, npps_period) &&
+		MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns));
+}
+
+static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev)
+{
+	return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify);
+}
+
+static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz)
+{
+	/* Optimal shift constant leads to corrections above just 1 scaled ppm.
+	 *
+	 * Two sets of equations are needed to derive the optimal shift
+	 * constant for the cyclecounter.
+	 *
+	 *    dev_freq_khz * 1000 / 2^shift_constant = 1 scaled_ppm
+	 *    ppb = scaled_ppm * 1000 / 2^16
+	 *
+	 * Using the two equations together
+	 *
+	 *    dev_freq_khz * 1000 / 1 scaled_ppm = 2^shift_constant
+	 *    dev_freq_khz * 2^16 / 1 ppb = 2^shift_constant
+	 *    dev_freq_khz = 2^(shift_constant - 16)
+	 *
+	 * then yields
+	 *
+	 *    shift_constant = ilog2(dev_freq_khz) + 16
+	 */
+
+	return min(ilog2(dev_freq_khz) + 16,
+		   ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz));
+}
+
+static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_core_dev *mdev;
+
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+	return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ?
+		       MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX :
+			     MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX;
+}
+
+static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta)
+{
+	s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info);
+
+	if (delta < -max || delta > max)
+		return false;
+
+	return true;
+}
+
+static int mlx5_set_mtutc(struct mlx5_core_dev *dev, u32 *mtutc, u32 size)
+{
+	u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+	if (!MLX5_CAP_MCAM_REG(dev, mtutc))
+		return -EOPNOTSUPP;
+
+	return mlx5_core_access_reg(dev, mtutc, size, out, sizeof(out),
+				    MLX5_REG_MTUTC, 0, 1);
+}
+
+static u64 mlx5_read_time(struct mlx5_core_dev *dev,
+			  struct ptp_system_timestamp *sts,
+			  bool real_time)
+{
+	u32 timer_h, timer_h1, timer_l;
+
+	timer_h = ioread32be(real_time ? &dev->iseg->real_time_h :
+			     &dev->iseg->internal_timer_h);
+	ptp_read_system_prets(sts);
+	timer_l = ioread32be(real_time ? &dev->iseg->real_time_l :
+			     &dev->iseg->internal_timer_l);
+	ptp_read_system_postts(sts);
+	timer_h1 = ioread32be(real_time ? &dev->iseg->real_time_h :
+			      &dev->iseg->internal_timer_h);
+	if (timer_h != timer_h1) {
+		/* wrap around */
+		ptp_read_system_prets(sts);
+		timer_l = ioread32be(real_time ? &dev->iseg->real_time_l :
+				     &dev->iseg->internal_timer_l);
+		ptp_read_system_postts(sts);
+	}
+
+	return real_time ? REAL_TIME_TO_NS(timer_h1, timer_l) :
+			   (u64)timer_l | (u64)timer_h1 << 32;
+}
+
+static u64 read_internal_timer(const struct cyclecounter *cc)
+{
+	struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles);
+	struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer);
+	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+						  clock);
+
+	return mlx5_read_time(mdev, NULL, false) & cc->mask;
+}
+
+static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+	struct mlx5_clock *clock = &mdev->clock;
+	struct mlx5_timer *timer;
+	u32 sign;
+
+	if (!clock_info)
+		return;
+
+	sign = smp_load_acquire(&clock_info->sign);
+	smp_store_mb(clock_info->sign,
+		     sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING);
+
+	timer = &clock->timer;
+	clock_info->cycles = timer->tc.cycle_last;
+	clock_info->mult   = timer->cycles.mult;
+	clock_info->nsec   = timer->tc.nsec;
+	clock_info->frac   = timer->tc.frac;
+
+	smp_store_release(&clock_info->sign,
+			  sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2);
+}
+
+static void mlx5_pps_out(struct work_struct *work)
+{
+	struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
+						 out_work);
+	struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock,
+						pps_info);
+	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+						  clock);
+	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < clock->ptp_info.n_pins; i++) {
+		u64 tstart;
+
+		write_seqlock_irqsave(&clock->lock, flags);
+		tstart = clock->pps_info.start[i];
+		clock->pps_info.start[i] = 0;
+		write_sequnlock_irqrestore(&clock->lock, flags);
+		if (!tstart)
+			continue;
+
+		MLX5_SET(mtpps_reg, in, pin, i);
+		MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
+		MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP);
+		mlx5_set_mtpps(mdev, in, sizeof(in));
+	}
+}
+
+static void mlx5_timestamp_overflow(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlx5_core_dev *mdev;
+	struct mlx5_timer *timer;
+	struct mlx5_clock *clock;
+	unsigned long flags;
+
+	timer = container_of(dwork, struct mlx5_timer, overflow_work);
+	clock = container_of(timer, struct mlx5_clock, timer);
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		goto out;
+
+	write_seqlock_irqsave(&clock->lock, flags);
+	timecounter_read(&timer->tc);
+	mlx5_update_clock_info_page(mdev);
+	write_sequnlock_irqrestore(&clock->lock, flags);
+
+out:
+	schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
+}
+
+static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev,
+				      const struct timespec64 *ts)
+{
+	u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+	if (!mlx5_modify_mtutc_allowed(mdev))
+		return 0;
+
+	if (ts->tv_sec < 0 || ts->tv_sec > U32_MAX ||
+	    ts->tv_nsec < 0 || ts->tv_nsec > NSEC_PER_SEC)
+		return -EINVAL;
+
+	MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_SET_TIME_IMMEDIATE);
+	MLX5_SET(mtutc_reg, in, utc_sec, ts->tv_sec);
+	MLX5_SET(mtutc_reg, in, utc_nsec, ts->tv_nsec);
+
+	return mlx5_set_mtutc(mdev, in, sizeof(in));
+}
+
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_timer *timer = &clock->timer;
+	struct mlx5_core_dev *mdev;
+	unsigned long flags;
+	int err;
+
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+	err = mlx5_ptp_settime_real_time(mdev, ts);
+	if (err)
+		return err;
+
+	write_seqlock_irqsave(&clock->lock, flags);
+	timecounter_init(&timer->tc, &timer->cycles, timespec64_to_ns(ts));
+	mlx5_update_clock_info_page(mdev);
+	write_sequnlock_irqrestore(&clock->lock, flags);
+
+	return 0;
+}
+
+static
+struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev,
+					      struct ptp_system_timestamp *sts)
+{
+	struct timespec64 ts;
+	u64 time;
+
+	time = mlx5_read_time(mdev, sts, true);
+	ts = ns_to_timespec64(time);
+	return ts;
+}
+
+static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+			     struct ptp_system_timestamp *sts)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_timer *timer = &clock->timer;
+	struct mlx5_core_dev *mdev;
+	unsigned long flags;
+	u64 cycles, ns;
+
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+	if (mlx5_real_time_mode(mdev)) {
+		*ts = mlx5_ptp_gettimex_real_time(mdev, sts);
+		goto out;
+	}
+
+	write_seqlock_irqsave(&clock->lock, flags);
+	cycles = mlx5_read_time(mdev, sts, false);
+	ns = timecounter_cyc2time(&timer->tc, cycles);
+	write_sequnlock_irqrestore(&clock->lock, flags);
+	*ts = ns_to_timespec64(ns);
+out:
+	return 0;
+}
+
+static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta)
+{
+	u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+	if (!mlx5_modify_mtutc_allowed(mdev))
+		return 0;
+
+	/* HW time adjustment range is checked. If out of range, settime instead */
+	if (!mlx5_is_mtutc_time_adj_cap(mdev, delta)) {
+		struct timespec64 ts;
+		s64 ns;
+
+		ts = mlx5_ptp_gettimex_real_time(mdev, NULL);
+		ns = timespec64_to_ns(&ts) + delta;
+		ts = ns_to_timespec64(ns);
+		return mlx5_ptp_settime_real_time(mdev, &ts);
+	}
+
+	MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_TIME);
+	MLX5_SET(mtutc_reg, in, time_adjustment, delta);
+
+	return mlx5_set_mtutc(mdev, in, sizeof(in));
+}
+
+static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_timer *timer = &clock->timer;
+	struct mlx5_core_dev *mdev;
+	unsigned long flags;
+	int err;
+
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+	err = mlx5_ptp_adjtime_real_time(mdev, delta);
+	if (err)
+		return err;
+	write_seqlock_irqsave(&clock->lock, flags);
+	timecounter_adjtime(&timer->tc, delta);
+	mlx5_update_clock_info_page(mdev);
+	write_sequnlock_irqrestore(&clock->lock, flags);
+
+	return 0;
+}
+
+static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_core_dev *mdev;
+
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+	return mlx5_ptp_adjtime_real_time(mdev, delta);
+}
+
+static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm)
+{
+	u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+
+	if (!mlx5_modify_mtutc_allowed(mdev))
+		return 0;
+
+	MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC);
+
+	if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units)) {
+		MLX5_SET(mtutc_reg, in, freq_adj_units,
+			 MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM);
+		MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm);
+	} else {
+		MLX5_SET(mtutc_reg, in, freq_adj_units, MLX5_MTUTC_FREQ_ADJ_UNITS_PPB);
+		MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm_to_ppb(scaled_ppm));
+	}
+
+	return mlx5_set_mtutc(mdev, in, sizeof(in));
+}
+
+static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_timer *timer = &clock->timer;
+	struct mlx5_core_dev *mdev;
+	unsigned long flags;
+	u32 mult;
+	int err;
+
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+	err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
+	if (err)
+		return err;
+
+	mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm);
+
+	write_seqlock_irqsave(&clock->lock, flags);
+	timecounter_read(&timer->tc);
+	timer->cycles.mult = mult;
+	mlx5_update_clock_info_page(mdev);
+	write_sequnlock_irqrestore(&clock->lock, flags);
+
+	return 0;
+}
+
+static int mlx5_extts_configure(struct ptp_clock_info *ptp,
+				struct ptp_clock_request *rq,
+				int on)
+{
+	struct mlx5_clock *clock =
+			container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_core_dev *mdev =
+			container_of(clock, struct mlx5_core_dev, clock);
+	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+	u32 field_select = 0;
+	u8 pin_mode = 0;
+	u8 pattern = 0;
+	int pin = -1;
+	int err = 0;
+
+	if (!MLX5_PPS_CAP(mdev))
+		return -EOPNOTSUPP;
+
+	/* Reject requests with unsupported flags */
+	if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+				PTP_RISING_EDGE |
+				PTP_FALLING_EDGE |
+				PTP_STRICT_FLAGS))
+		return -EOPNOTSUPP;
+
+	/* Reject requests to enable time stamping on both edges. */
+	if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+	    (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+	    (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+		return -EOPNOTSUPP;
+
+	if (rq->extts.index >= clock->ptp_info.n_pins)
+		return -EINVAL;
+
+	pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
+	if (pin < 0)
+		return -EBUSY;
+
+	if (on) {
+		pin_mode = MLX5_PIN_MODE_IN;
+		pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
+		field_select = MLX5_MTPPS_FS_PIN_MODE |
+			       MLX5_MTPPS_FS_PATTERN |
+			       MLX5_MTPPS_FS_ENABLE;
+	} else {
+		field_select = MLX5_MTPPS_FS_ENABLE;
+	}
+
+	MLX5_SET(mtpps_reg, in, pin, pin);
+	MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+	MLX5_SET(mtpps_reg, in, pattern, pattern);
+	MLX5_SET(mtpps_reg, in, enable, on);
+	MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+	err = mlx5_set_mtpps(mdev, in, sizeof(in));
+	if (err)
+		return err;
+
+	return mlx5_set_mtppse(mdev, pin, 0,
+			       MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+	u64 cycles_now, cycles_delta;
+	u64 nsec_now, nsec_delta;
+	struct mlx5_timer *timer;
+	unsigned long flags;
+
+	timer = &clock->timer;
+
+	cycles_now = mlx5_read_time(mdev, NULL, false);
+	write_seqlock_irqsave(&clock->lock, flags);
+	nsec_now = timecounter_cyc2time(&timer->tc, cycles_now);
+	nsec_delta = target_ns - nsec_now;
+	cycles_delta = div64_u64(nsec_delta << timer->cycles.shift,
+				 timer->cycles.mult);
+	write_sequnlock_irqrestore(&clock->lock, flags);
+
+	return cycles_now + cycles_delta;
+}
+
+static u64 perout_conf_internal_timer(struct mlx5_core_dev *mdev, s64 sec)
+{
+	struct timespec64 ts = {};
+	s64 target_ns;
+
+	ts.tv_sec = sec;
+	target_ns = timespec64_to_ns(&ts);
+
+	return find_target_cycles(mdev, target_ns);
+}
+
+static u64 perout_conf_real_time(s64 sec, u32 nsec)
+{
+	return (u64)nsec | (u64)sec << 32;
+}
+
+static int perout_conf_1pps(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq,
+			    u64 *time_stamp, bool real_time)
+{
+	struct timespec64 ts;
+	s64 ns;
+
+	ts.tv_nsec = rq->perout.period.nsec;
+	ts.tv_sec = rq->perout.period.sec;
+	ns = timespec64_to_ns(&ts);
+
+	if ((ns >> 1) != 500000000LL)
+		return -EINVAL;
+
+	*time_stamp = real_time ? perout_conf_real_time(rq->perout.start.sec, 0) :
+		      perout_conf_internal_timer(mdev, rq->perout.start.sec);
+
+	return 0;
+}
+
+#define MLX5_MAX_PULSE_DURATION (BIT(__mlx5_bit_sz(mtpps_reg, out_pulse_duration_ns)) - 1)
+static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev,
+					       struct ptp_clock_request *rq,
+					       u32 *out_pulse_duration_ns)
+{
+	struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+	u32 out_pulse_duration;
+	struct timespec64 ts;
+
+	if (rq->perout.flags & PTP_PEROUT_DUTY_CYCLE) {
+		ts.tv_sec = rq->perout.on.sec;
+		ts.tv_nsec = rq->perout.on.nsec;
+		out_pulse_duration = (u32)timespec64_to_ns(&ts);
+	} else {
+		/* out_pulse_duration_ns should be up to 50% of the
+		 * pulse period as default
+		 */
+		ts.tv_sec = rq->perout.period.sec;
+		ts.tv_nsec = rq->perout.period.nsec;
+		out_pulse_duration = (u32)timespec64_to_ns(&ts) >> 1;
+	}
+
+	if (out_pulse_duration < pps_info->min_out_pulse_duration_ns ||
+	    out_pulse_duration > MLX5_MAX_PULSE_DURATION) {
+		mlx5_core_err(mdev, "NPPS pulse duration %u is not in [%llu, %lu]\n",
+			      out_pulse_duration, pps_info->min_out_pulse_duration_ns,
+			      MLX5_MAX_PULSE_DURATION);
+		return -EINVAL;
+	}
+	*out_pulse_duration_ns = out_pulse_duration;
+
+	return 0;
+}
+
+static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq,
+				      u32 *field_select, u32 *out_pulse_duration_ns,
+				      u64 *period, u64 *time_stamp)
+{
+	struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+	struct ptp_clock_time *time = &rq->perout.start;
+	struct timespec64 ts;
+
+	ts.tv_sec = rq->perout.period.sec;
+	ts.tv_nsec = rq->perout.period.nsec;
+	if (timespec64_to_ns(&ts) < pps_info->min_npps_period) {
+		mlx5_core_err(mdev, "NPPS period is lower than minimal npps period %llu\n",
+			      pps_info->min_npps_period);
+		return -EINVAL;
+	}
+	*period = perout_conf_real_time(rq->perout.period.sec, rq->perout.period.nsec);
+
+	if (mlx5_perout_conf_out_pulse_duration(mdev, rq, out_pulse_duration_ns))
+		return -EINVAL;
+
+	*time_stamp = perout_conf_real_time(time->sec, time->nsec);
+	*field_select |= MLX5_MTPPS_FS_NPPS_PERIOD |
+			 MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS;
+
+	return 0;
+}
+
+static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags)
+{
+	return ((!mlx5_npps_real_time_supported(mdev) && flags) ||
+		(mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE));
+}
+
+static int mlx5_perout_configure(struct ptp_clock_info *ptp,
+				 struct ptp_clock_request *rq,
+				 int on)
+{
+	struct mlx5_clock *clock =
+			container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_core_dev *mdev =
+			container_of(clock, struct mlx5_core_dev, clock);
+	bool rt_mode = mlx5_real_time_mode(mdev);
+	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+	u32 out_pulse_duration_ns = 0;
+	u32 field_select = 0;
+	u64 npps_period = 0;
+	u64 time_stamp = 0;
+	u8 pin_mode = 0;
+	u8 pattern = 0;
+	int pin = -1;
+	int err = 0;
+
+	if (!MLX5_PPS_CAP(mdev))
+		return -EOPNOTSUPP;
+
+	/* Reject requests with unsupported flags */
+	if (mlx5_perout_verify_flags(mdev, rq->perout.flags))
+		return -EOPNOTSUPP;
+
+	if (rq->perout.index >= clock->ptp_info.n_pins)
+		return -EINVAL;
+
+	field_select = MLX5_MTPPS_FS_ENABLE;
+	pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index);
+	if (pin < 0)
+		return -EBUSY;
+
+	if (on) {
+		bool rt_mode = mlx5_real_time_mode(mdev);
+
+		pin_mode = MLX5_PIN_MODE_OUT;
+		pattern = MLX5_OUT_PATTERN_PERIODIC;
+
+		if (rt_mode &&  rq->perout.start.sec > U32_MAX)
+			return -EINVAL;
+
+		field_select |= MLX5_MTPPS_FS_PIN_MODE |
+				MLX5_MTPPS_FS_PATTERN |
+				MLX5_MTPPS_FS_TIME_STAMP;
+
+		if (mlx5_npps_real_time_supported(mdev))
+			err = perout_conf_npps_real_time(mdev, rq, &field_select,
+							 &out_pulse_duration_ns, &npps_period,
+							 &time_stamp);
+		else
+			err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode);
+		if (err)
+			return err;
+	}
+
+	MLX5_SET(mtpps_reg, in, pin, pin);
+	MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+	MLX5_SET(mtpps_reg, in, pattern, pattern);
+	MLX5_SET(mtpps_reg, in, enable, on);
+	MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+	MLX5_SET(mtpps_reg, in, field_select, field_select);
+	MLX5_SET64(mtpps_reg, in, npps_period, npps_period);
+	MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns);
+	err = mlx5_set_mtpps(mdev, in, sizeof(in));
+	if (err)
+		return err;
+
+	if (rt_mode)
+		return 0;
+
+	return mlx5_set_mtppse(mdev, pin, 0,
+			       MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_pps_configure(struct ptp_clock_info *ptp,
+			      struct ptp_clock_request *rq,
+			      int on)
+{
+	struct mlx5_clock *clock =
+			container_of(ptp, struct mlx5_clock, ptp_info);
+
+	clock->pps_info.enabled = !!on;
+	return 0;
+}
+
+static int mlx5_ptp_enable(struct ptp_clock_info *ptp,
+			   struct ptp_clock_request *rq,
+			   int on)
+{
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		return mlx5_extts_configure(ptp, rq, on);
+	case PTP_CLK_REQ_PEROUT:
+		return mlx5_perout_configure(ptp, rq, on);
+	case PTP_CLK_REQ_PPS:
+		return mlx5_pps_configure(ptp, rq, on);
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+enum {
+	MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0),
+	MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1),
+};
+
+static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+			   enum ptp_pin_function func, unsigned int chan)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+						ptp_info);
+
+	switch (func) {
+	case PTP_PF_NONE:
+		return 0;
+	case PTP_PF_EXTTS:
+		return !(clock->pps_info.pin_caps[pin] &
+			 MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN);
+	case PTP_PF_PEROUT:
+		return !(clock->pps_info.pin_caps[pin] &
+			 MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct ptp_clock_info mlx5_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.name		= "mlx5_ptp",
+	.max_adj	= 50000000,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.adjfine	= mlx5_ptp_adjfine,
+	.adjphase	= mlx5_ptp_adjphase,
+	.getmaxphase    = mlx5_ptp_getmaxphase,
+	.adjtime	= mlx5_ptp_adjtime,
+	.gettimex64	= mlx5_ptp_gettimex,
+	.settime64	= mlx5_ptp_settime,
+	.enable		= NULL,
+	.verify		= NULL,
+};
+
+static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
+				     u32 *mtpps, u32 mtpps_size)
+{
+	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {};
+
+	MLX5_SET(mtpps_reg, in, pin, pin);
+
+	return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps,
+				    mtpps_size, MLX5_REG_MTPPS, 0, 0);
+}
+
+static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
+{
+	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+	u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {};
+	u8 mode;
+	int err;
+
+	err = mlx5_query_mtpps_pin_mode(mdev, pin, out, sizeof(out));
+	if (err || !MLX5_GET(mtpps_reg, out, enable))
+		return PTP_PF_NONE;
+
+	mode = MLX5_GET(mtpps_reg, out, pin_mode);
+
+	if (mode == MLX5_PIN_MODE_IN)
+		return PTP_PF_EXTTS;
+	else if (mode == MLX5_PIN_MODE_OUT)
+		return PTP_PF_PEROUT;
+
+	return PTP_PF_NONE;
+}
+
+static void mlx5_init_pin_config(struct mlx5_clock *clock)
+{
+	int i;
+
+	if (!clock->ptp_info.n_pins)
+		return;
+
+	clock->ptp_info.pin_config =
+			kcalloc(clock->ptp_info.n_pins,
+				sizeof(*clock->ptp_info.pin_config),
+				GFP_KERNEL);
+	if (!clock->ptp_info.pin_config)
+		return;
+	clock->ptp_info.enable = mlx5_ptp_enable;
+	clock->ptp_info.verify = mlx5_ptp_verify;
+	clock->ptp_info.pps = 1;
+
+	for (i = 0; i < clock->ptp_info.n_pins; i++) {
+		snprintf(clock->ptp_info.pin_config[i].name,
+			 sizeof(clock->ptp_info.pin_config[i].name),
+			 "mlx5_pps%d", i);
+		clock->ptp_info.pin_config[i].index = i;
+		clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i);
+		clock->ptp_info.pin_config[i].chan = 0;
+	}
+}
+
+static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+	u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+	mlx5_query_mtpps(mdev, out, sizeof(out));
+
+	clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out,
+					  cap_number_of_pps_pins);
+	clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out,
+					    cap_max_num_of_pps_in_pins);
+	clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
+					     cap_max_num_of_pps_out_pins);
+
+	if (MLX5_CAP_MCAM_FEATURE(mdev, npps_period))
+		clock->pps_info.min_npps_period = 1 << MLX5_GET(mtpps_reg, out,
+								cap_log_min_npps_period);
+	if (MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns))
+		clock->pps_info.min_out_pulse_duration_ns = 1 << MLX5_GET(mtpps_reg, out,
+								cap_log_min_out_pulse_duration_ns);
+
+	clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
+	clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
+	clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
+	clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
+	clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
+	clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
+	clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
+	clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
+}
+
+static void ts_next_sec(struct timespec64 *ts)
+{
+	ts->tv_sec += 1;
+	ts->tv_nsec = 0;
+}
+
+static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev,
+					struct mlx5_clock *clock)
+{
+	struct timespec64 ts;
+	s64 target_ns;
+
+	mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL);
+	ts_next_sec(&ts);
+	target_ns = timespec64_to_ns(&ts);
+
+	return find_target_cycles(mdev, target_ns);
+}
+
+static int mlx5_pps_event(struct notifier_block *nb,
+			  unsigned long type, void *data)
+{
+	struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+	struct ptp_clock_event ptp_event;
+	struct mlx5_eqe *eqe = data;
+	int pin = eqe->data.pps.pin;
+	struct mlx5_core_dev *mdev;
+	unsigned long flags;
+	u64 ns;
+
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+	switch (clock->ptp_info.pin_config[pin].func) {
+	case PTP_PF_EXTTS:
+		ptp_event.index = pin;
+		ptp_event.timestamp = mlx5_real_time_mode(mdev) ?
+			mlx5_real_time_cyc2time(clock,
+						be64_to_cpu(eqe->data.pps.time_stamp)) :
+			mlx5_timecounter_cyc2time(clock,
+						  be64_to_cpu(eqe->data.pps.time_stamp));
+		if (clock->pps_info.enabled) {
+			ptp_event.type = PTP_CLOCK_PPSUSR;
+			ptp_event.pps_times.ts_real =
+					ns_to_timespec64(ptp_event.timestamp);
+		} else {
+			ptp_event.type = PTP_CLOCK_EXTTS;
+		}
+		/* TODOL clock->ptp can be NULL if ptp_clock_register fails */
+		ptp_clock_event(clock->ptp, &ptp_event);
+		break;
+	case PTP_PF_PEROUT:
+		ns = perout_conf_next_event_timer(mdev, clock);
+		write_seqlock_irqsave(&clock->lock, flags);
+		clock->pps_info.start[pin] = ns;
+		write_sequnlock_irqrestore(&clock->lock, flags);
+		schedule_work(&clock->pps_info.out_work);
+		break;
+	default:
+		mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
+			      clock->ptp_info.pin_config[pin].func);
+	}
+
+	return NOTIFY_OK;
+}
+
+static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+	struct mlx5_timer *timer = &clock->timer;
+	u32 dev_freq;
+
+	dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz);
+	timer->cycles.read = read_internal_timer;
+	timer->cycles.shift = mlx5_ptp_shift_constant(dev_freq);
+	timer->cycles.mult = clocksource_khz2mult(dev_freq,
+						  timer->cycles.shift);
+	timer->nominal_c_mult = timer->cycles.mult;
+	timer->cycles.mask = CLOCKSOURCE_MASK(41);
+
+	timecounter_init(&timer->tc, &timer->cycles,
+			 ktime_to_ns(ktime_get_real()));
+}
+
+static void mlx5_init_overflow_period(struct mlx5_clock *clock)
+{
+	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
+	struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+	struct mlx5_timer *timer = &clock->timer;
+	u64 overflow_cycles;
+	u64 frac = 0;
+	u64 ns;
+
+	/* Calculate period in seconds to call the overflow watchdog - to make
+	 * sure counter is checked at least twice every wrap around.
+	 * The period is calculated as the minimum between max HW cycles count
+	 * (The clock source mask) and max amount of cycles that can be
+	 * multiplied by clock multiplier where the result doesn't exceed
+	 * 64bits.
+	 */
+	overflow_cycles = div64_u64(~0ULL >> 1, timer->cycles.mult);
+	overflow_cycles = min(overflow_cycles, div_u64(timer->cycles.mask, 3));
+
+	ns = cyclecounter_cyc2ns(&timer->cycles, overflow_cycles,
+				 frac, &frac);
+	do_div(ns, NSEC_PER_SEC / HZ);
+	timer->overflow_period = ns;
+
+	INIT_DELAYED_WORK(&timer->overflow_work, mlx5_timestamp_overflow);
+	if (timer->overflow_period)
+		schedule_delayed_work(&timer->overflow_work, 0);
+	else
+		mlx5_core_warn(mdev,
+			       "invalid overflow period, overflow_work is not scheduled\n");
+
+	if (clock_info)
+		clock_info->overflow_period = timer->overflow_period;
+}
+
+static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+	struct mlx5_ib_clock_info *info;
+	struct mlx5_timer *timer;
+
+	mdev->clock_info = (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL);
+	if (!mdev->clock_info) {
+		mlx5_core_warn(mdev, "Failed to allocate IB clock info page\n");
+		return;
+	}
+
+	info = mdev->clock_info;
+	timer = &clock->timer;
+
+	info->nsec = timer->tc.nsec;
+	info->cycles = timer->tc.cycle_last;
+	info->mask = timer->cycles.mask;
+	info->mult = timer->nominal_c_mult;
+	info->shift = timer->cycles.shift;
+	info->frac = timer->tc.frac;
+}
+
+static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+
+	mlx5_timecounter_init(mdev);
+	mlx5_init_clock_info(mdev);
+	mlx5_init_overflow_period(clock);
+	clock->ptp_info = mlx5_ptp_clock_info;
+
+	if (mlx5_real_time_mode(mdev)) {
+		struct timespec64 ts;
+
+		ktime_get_real_ts64(&ts);
+		mlx5_ptp_settime(&clock->ptp_info, &ts);
+	}
+}
+
+static void mlx5_init_pps(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+
+	if (!MLX5_PPS_CAP(mdev))
+		return;
+
+	mlx5_get_pps_caps(mdev);
+	mlx5_init_pin_config(clock);
+}
+
+void mlx5_init_clock(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+
+	if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) {
+		mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+		return;
+	}
+
+	seqlock_init(&clock->lock);
+	mlx5_init_timer_clock(mdev);
+	INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
+
+	/* Configure the PHC */
+	clock->ptp_info = mlx5_ptp_clock_info;
+
+	/* Initialize 1PPS data structures */
+	mlx5_init_pps(mdev);
+
+	clock->ptp = ptp_clock_register(&clock->ptp_info,
+					&mdev->pdev->dev);
+	if (IS_ERR(clock->ptp)) {
+		mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n",
+			       PTR_ERR(clock->ptp));
+		clock->ptp = NULL;
+	}
+
+	MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
+	mlx5_eq_notifier_register(mdev, &clock->pps_nb);
+}
+
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+
+	if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+		return;
+
+	mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
+	if (clock->ptp) {
+		ptp_clock_unregister(clock->ptp);
+		clock->ptp = NULL;
+	}
+
+	cancel_work_sync(&clock->pps_info.out_work);
+	cancel_delayed_work_sync(&clock->timer.overflow_work);
+
+	if (mdev->clock_info) {
+		free_page((unsigned long)mdev->clock_info);
+		mdev->clock_info = NULL;
+	}
+
+	kfree(clock->ptp_info.pin_config);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
new file mode 100644
index 0000000000..bd95b9f8d1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_CLOCK_H__
+#define __LIB_CLOCK_H__
+
+static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev)
+{
+	u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format);
+
+	return (rq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
+		rq_ts_format_cap ==
+			MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME);
+}
+
+static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev)
+{
+	u8 sq_ts_format_cap = MLX5_CAP_GEN(mdev, sq_ts_format);
+
+	return (sq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
+		sq_ts_format_cap ==
+			MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME);
+}
+
+typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64);
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+void mlx5_init_clock(struct mlx5_core_dev *mdev);
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
+
+static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
+{
+	return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1;
+}
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+						u64 timestamp)
+{
+	struct mlx5_timer *timer = &clock->timer;
+	unsigned int seq;
+	u64 nsec;
+
+	do {
+		seq = read_seqbegin(&clock->lock);
+		nsec = timecounter_cyc2time(&timer->tc, timestamp);
+	} while (read_seqretry(&clock->lock, seq));
+
+	return ns_to_ktime(nsec);
+}
+
+#define REAL_TIME_TO_NS(hi, low) (((u64)hi) * NSEC_PER_SEC + ((u64)low))
+
+static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
+					      u64 timestamp)
+{
+	u64 time = REAL_TIME_TO_NS(timestamp >> 32, timestamp & 0xFFFFFFFF);
+
+	return ns_to_ktime(time);
+}
+#else
+static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
+static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
+{
+	return -1;
+}
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+						u64 timestamp)
+{
+	return 0;
+}
+
+static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
+					      u64 timestamp)
+{
+	return 0;
+}
+#endif
+
+static inline cqe_ts_to_ns mlx5_rq_ts_translator(struct mlx5_core_dev *mdev)
+{
+	return mlx5_is_real_time_rq(mdev) ? mlx5_real_time_cyc2time :
+					    mlx5_timecounter_cyc2time;
+}
+
+static inline cqe_ts_to_ns mlx5_sq_ts_translator(struct mlx5_core_dev *mdev)
+{
+	return mlx5_is_real_time_sq(mdev) ? mlx5_real_time_cyc2time :
+					    mlx5_timecounter_cyc2time;
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
new file mode 100644
index 0000000000..3a94b8f803
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
@@ -0,0 +1,774 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "mlx5_core.h"
+#include "lib/crypto.h"
+
+#define MLX5_CRYPTO_DEK_POOLS_NUM (MLX5_ACCEL_OBJ_TYPE_KEY_NUM - 1)
+#define type2idx(type) ((type) - 1)
+
+#define MLX5_CRYPTO_DEK_POOL_SYNC_THRESH 128
+
+/* calculate the num of DEKs, which are freed by any user
+ * (for example, TLS) after last revalidation in a pool or a bulk.
+ */
+#define MLX5_CRYPTO_DEK_CALC_FREED(a) \
+	({ typeof(a) _a = (a); \
+	   _a->num_deks - _a->avail_deks - _a->in_use_deks; })
+
+#define MLX5_CRYPTO_DEK_POOL_CALC_FREED(pool) MLX5_CRYPTO_DEK_CALC_FREED(pool)
+#define MLX5_CRYPTO_DEK_BULK_CALC_FREED(bulk) MLX5_CRYPTO_DEK_CALC_FREED(bulk)
+
+#define MLX5_CRYPTO_DEK_BULK_IDLE(bulk) \
+	({ typeof(bulk) _bulk = (bulk); \
+	   _bulk->avail_deks == _bulk->num_deks; })
+
+enum {
+	MLX5_CRYPTO_DEK_ALL_TYPE = BIT(0),
+};
+
+struct mlx5_crypto_dek_pool {
+	struct mlx5_core_dev *mdev;
+	u32 key_purpose;
+	int num_deks; /* the total number of keys in this pool */
+	int avail_deks; /* the number of available keys in this pool */
+	int in_use_deks; /* the number of being used keys in this pool */
+	struct mutex lock; /* protect the following lists, and the bulks */
+	struct list_head partial_list; /* some of keys are available */
+	struct list_head full_list; /* no available keys */
+	struct list_head avail_list; /* all keys are available to use */
+
+	/* No in-used keys, and all need to be synced.
+	 * These bulks will be put to avail list after sync.
+	 */
+	struct list_head sync_list;
+
+	bool syncing;
+	struct list_head wait_for_free;
+	struct work_struct sync_work;
+
+	spinlock_t destroy_lock; /* protect destroy_list */
+	struct list_head destroy_list;
+	struct work_struct destroy_work;
+};
+
+struct mlx5_crypto_dek_bulk {
+	struct mlx5_core_dev *mdev;
+	int base_obj_id;
+	int avail_start; /* the bit to start search */
+	int num_deks; /* the total number of keys in a bulk */
+	int avail_deks; /* the number of keys available, with need_sync bit 0 */
+	int in_use_deks; /* the number of keys being used, with in_use bit 1 */
+	struct list_head entry;
+
+	/* 0: not being used by any user, 1: otherwise */
+	unsigned long *in_use;
+
+	/* The bits are set when they are used, and reset after crypto_sync
+	 * is executed. So, the value 0 means the key is newly created, or not
+	 * used after sync, and 1 means it is in use, or freed but not synced
+	 */
+	unsigned long *need_sync;
+};
+
+struct mlx5_crypto_dek_priv {
+	struct mlx5_core_dev *mdev;
+	int log_dek_obj_range;
+};
+
+struct mlx5_crypto_dek {
+	struct mlx5_crypto_dek_bulk *bulk;
+	struct list_head entry;
+	u32 obj_id;
+};
+
+u32 mlx5_crypto_dek_get_id(struct mlx5_crypto_dek *dek)
+{
+	return dek->obj_id;
+}
+
+static int mlx5_crypto_dek_get_key_sz(struct mlx5_core_dev *mdev,
+				      u32 sz_bytes, u8 *key_sz_p)
+{
+	u32 sz_bits = sz_bytes * BITS_PER_BYTE;
+
+	switch (sz_bits) {
+	case 128:
+		*key_sz_p = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128;
+		break;
+	case 256:
+		*key_sz_p = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256;
+		break;
+	default:
+		mlx5_core_err(mdev, "Crypto offload error, invalid key size (%u bits)\n",
+			      sz_bits);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int mlx5_crypto_dek_fill_key(struct mlx5_core_dev *mdev, u8 *key_obj,
+				    const void *key, u32 sz_bytes)
+{
+	void *dst;
+	u8 key_sz;
+	int err;
+
+	err = mlx5_crypto_dek_get_key_sz(mdev, sz_bytes, &key_sz);
+	if (err)
+		return err;
+
+	MLX5_SET(encryption_key_obj, key_obj, key_size, key_sz);
+
+	if (sz_bytes == 16)
+		/* For key size of 128b the MSBs are reserved. */
+		dst = MLX5_ADDR_OF(encryption_key_obj, key_obj, key[1]);
+	else
+		dst = MLX5_ADDR_OF(encryption_key_obj, key_obj, key);
+
+	memcpy(dst, key, sz_bytes);
+
+	return 0;
+}
+
+static int mlx5_crypto_cmd_sync_crypto(struct mlx5_core_dev *mdev,
+				       int crypto_type)
+{
+	u32 in[MLX5_ST_SZ_DW(sync_crypto_in)] = {};
+	int err;
+
+	mlx5_core_dbg(mdev,
+		      "Execute SYNC_CRYPTO command with crypto_type(0x%x)\n",
+		      crypto_type);
+
+	MLX5_SET(sync_crypto_in, in, opcode, MLX5_CMD_OP_SYNC_CRYPTO);
+	MLX5_SET(sync_crypto_in, in, crypto_type, crypto_type);
+
+	err = mlx5_cmd_exec_in(mdev, sync_crypto, in);
+	if (err)
+		mlx5_core_err(mdev,
+			      "Failed to exec sync crypto, type=%d, err=%d\n",
+			      crypto_type, err);
+
+	return err;
+}
+
+static int mlx5_crypto_create_dek_bulk(struct mlx5_core_dev *mdev,
+				       u32 key_purpose, int log_obj_range,
+				       u32 *obj_id)
+{
+	u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	void *obj, *param;
+	int err;
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+	param = MLX5_ADDR_OF(general_obj_in_cmd_hdr, in, op_param);
+	MLX5_SET(general_obj_create_param, param, log_obj_range, log_obj_range);
+
+	obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object);
+	MLX5_SET(encryption_key_obj, obj, key_purpose, key_purpose);
+	MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	mlx5_core_dbg(mdev, "DEK objects created, bulk=%d, obj_id=%d\n",
+		      1 << log_obj_range, *obj_id);
+
+	return 0;
+}
+
+static int mlx5_crypto_modify_dek_key(struct mlx5_core_dev *mdev,
+				      const void *key, u32 sz_bytes, u32 key_purpose,
+				      u32 obj_id, u32 obj_offset)
+{
+	u32 in[MLX5_ST_SZ_DW(modify_encryption_key_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	void *obj, *param;
+	int err;
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+	param = MLX5_ADDR_OF(general_obj_in_cmd_hdr, in, op_param);
+	MLX5_SET(general_obj_query_param, param, obj_offset, obj_offset);
+
+	obj = MLX5_ADDR_OF(modify_encryption_key_in, in, encryption_key_object);
+	MLX5_SET64(encryption_key_obj, obj, modify_field_select, 1);
+	MLX5_SET(encryption_key_obj, obj, key_purpose, key_purpose);
+	MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn);
+
+	err = mlx5_crypto_dek_fill_key(mdev, obj, key, sz_bytes);
+	if (err)
+		return err;
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+
+	/* avoid leaking key on the stack */
+	memzero_explicit(in, sizeof(in));
+
+	return err;
+}
+
+static int mlx5_crypto_create_dek_key(struct mlx5_core_dev *mdev,
+				      const void *key, u32 sz_bytes,
+				      u32 key_purpose, u32 *p_key_id)
+{
+	u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	u64 general_obj_types;
+	void *obj;
+	int err;
+
+	general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+	if (!(general_obj_types &
+	      MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY))
+		return -EINVAL;
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+
+	obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object);
+	MLX5_SET(encryption_key_obj, obj, key_purpose, key_purpose);
+	MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn);
+
+	err = mlx5_crypto_dek_fill_key(mdev, obj, key, sz_bytes);
+	if (err)
+		return err;
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+	/* avoid leaking key on the stack */
+	memzero_explicit(in, sizeof(in));
+
+	return err;
+}
+
+static void mlx5_crypto_destroy_dek_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id);
+
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+			       const void *key, u32 sz_bytes,
+			       u32 key_type, u32 *p_key_id)
+{
+	return mlx5_crypto_create_dek_key(mdev, key, sz_bytes, key_type, p_key_id);
+}
+
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+	mlx5_crypto_destroy_dek_key(mdev, key_id);
+}
+
+static struct mlx5_crypto_dek_bulk *
+mlx5_crypto_dek_bulk_create(struct mlx5_crypto_dek_pool *pool)
+{
+	struct mlx5_crypto_dek_priv *dek_priv = pool->mdev->mlx5e_res.dek_priv;
+	struct mlx5_core_dev *mdev = pool->mdev;
+	struct mlx5_crypto_dek_bulk *bulk;
+	int num_deks, base_obj_id;
+	int err;
+
+	bulk = kzalloc(sizeof(*bulk), GFP_KERNEL);
+	if (!bulk)
+		return ERR_PTR(-ENOMEM);
+
+	num_deks = 1 << dek_priv->log_dek_obj_range;
+	bulk->need_sync = bitmap_zalloc(num_deks, GFP_KERNEL);
+	if (!bulk->need_sync) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	bulk->in_use = bitmap_zalloc(num_deks, GFP_KERNEL);
+	if (!bulk->in_use) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	err = mlx5_crypto_create_dek_bulk(mdev, pool->key_purpose,
+					  dek_priv->log_dek_obj_range,
+					  &base_obj_id);
+	if (err)
+		goto err_out;
+
+	bulk->base_obj_id = base_obj_id;
+	bulk->num_deks = num_deks;
+	bulk->avail_deks = num_deks;
+	bulk->mdev = mdev;
+
+	return bulk;
+
+err_out:
+	bitmap_free(bulk->in_use);
+	bitmap_free(bulk->need_sync);
+	kfree(bulk);
+	return ERR_PTR(err);
+}
+
+static struct mlx5_crypto_dek_bulk *
+mlx5_crypto_dek_pool_add_bulk(struct mlx5_crypto_dek_pool *pool)
+{
+	struct mlx5_crypto_dek_bulk *bulk;
+
+	bulk = mlx5_crypto_dek_bulk_create(pool);
+	if (IS_ERR(bulk))
+		return bulk;
+
+	pool->avail_deks += bulk->num_deks;
+	pool->num_deks += bulk->num_deks;
+	list_add(&bulk->entry, &pool->partial_list);
+
+	return bulk;
+}
+
+static void mlx5_crypto_dek_bulk_free(struct mlx5_crypto_dek_bulk *bulk)
+{
+	mlx5_crypto_destroy_dek_key(bulk->mdev, bulk->base_obj_id);
+	bitmap_free(bulk->need_sync);
+	bitmap_free(bulk->in_use);
+	kfree(bulk);
+}
+
+static void mlx5_crypto_dek_pool_remove_bulk(struct mlx5_crypto_dek_pool *pool,
+					     struct mlx5_crypto_dek_bulk *bulk,
+					     bool delay)
+{
+	pool->num_deks -= bulk->num_deks;
+	pool->avail_deks -= bulk->avail_deks;
+	pool->in_use_deks -= bulk->in_use_deks;
+	list_del(&bulk->entry);
+	if (!delay)
+		mlx5_crypto_dek_bulk_free(bulk);
+}
+
+static struct mlx5_crypto_dek_bulk *
+mlx5_crypto_dek_pool_pop(struct mlx5_crypto_dek_pool *pool, u32 *obj_offset)
+{
+	struct mlx5_crypto_dek_bulk *bulk;
+	int pos;
+
+	mutex_lock(&pool->lock);
+	bulk = list_first_entry_or_null(&pool->partial_list,
+					struct mlx5_crypto_dek_bulk, entry);
+
+	if (bulk) {
+		pos = find_next_zero_bit(bulk->need_sync, bulk->num_deks,
+					 bulk->avail_start);
+		if (pos == bulk->num_deks) {
+			mlx5_core_err(pool->mdev, "Wrong DEK bulk avail_start.\n");
+			pos = find_first_zero_bit(bulk->need_sync, bulk->num_deks);
+		}
+		WARN_ON(pos == bulk->num_deks);
+	} else {
+		bulk = list_first_entry_or_null(&pool->avail_list,
+						struct mlx5_crypto_dek_bulk,
+						entry);
+		if (bulk) {
+			list_move(&bulk->entry, &pool->partial_list);
+		} else {
+			bulk = mlx5_crypto_dek_pool_add_bulk(pool);
+			if (IS_ERR(bulk))
+				goto out;
+		}
+		pos = 0;
+	}
+
+	*obj_offset = pos;
+	bitmap_set(bulk->need_sync, pos, 1);
+	bitmap_set(bulk->in_use, pos, 1);
+	bulk->in_use_deks++;
+	bulk->avail_deks--;
+	if (!bulk->avail_deks) {
+		list_move(&bulk->entry, &pool->full_list);
+		bulk->avail_start = bulk->num_deks;
+	} else {
+		bulk->avail_start = pos + 1;
+	}
+	pool->avail_deks--;
+	pool->in_use_deks++;
+
+out:
+	mutex_unlock(&pool->lock);
+	return bulk;
+}
+
+static bool mlx5_crypto_dek_need_sync(struct mlx5_crypto_dek_pool *pool)
+{
+	return !pool->syncing &&
+	       MLX5_CRYPTO_DEK_POOL_CALC_FREED(pool) > MLX5_CRYPTO_DEK_POOL_SYNC_THRESH;
+}
+
+static int mlx5_crypto_dek_free_locked(struct mlx5_crypto_dek_pool *pool,
+				       struct mlx5_crypto_dek *dek)
+{
+	struct mlx5_crypto_dek_bulk *bulk = dek->bulk;
+	int obj_offset;
+	bool old_val;
+	int err = 0;
+
+	obj_offset = dek->obj_id - bulk->base_obj_id;
+	old_val = test_and_clear_bit(obj_offset, bulk->in_use);
+	WARN_ON_ONCE(!old_val);
+	if (!old_val) {
+		err = -ENOENT;
+		goto out_free;
+	}
+	pool->in_use_deks--;
+	bulk->in_use_deks--;
+	if (!bulk->avail_deks && !bulk->in_use_deks)
+		list_move(&bulk->entry, &pool->sync_list);
+
+	if (mlx5_crypto_dek_need_sync(pool) && schedule_work(&pool->sync_work))
+		pool->syncing = true;
+
+out_free:
+	kfree(dek);
+	return err;
+}
+
+static int mlx5_crypto_dek_pool_push(struct mlx5_crypto_dek_pool *pool,
+				     struct mlx5_crypto_dek *dek)
+{
+	int err = 0;
+
+	mutex_lock(&pool->lock);
+	if (pool->syncing)
+		list_add(&dek->entry, &pool->wait_for_free);
+	else
+		err = mlx5_crypto_dek_free_locked(pool, dek);
+	mutex_unlock(&pool->lock);
+
+	return err;
+}
+
+/* Update the bits for a bulk while sync, and avail_next for search.
+ * As the combinations of (need_sync, in_use) of one DEK are
+ *    - (0,0) means the key is ready for use,
+ *    - (1,1) means the key is currently being used by a user,
+ *    - (1,0) means the key is freed, and waiting for being synced,
+ *    - (0,1) is invalid state.
+ * the number of revalidated DEKs can be calculated by
+ * hweight_long(need_sync XOR in_use), and the need_sync bits can be reset
+ * by simply copying from in_use bits.
+ */
+static void mlx5_crypto_dek_bulk_reset_synced(struct mlx5_crypto_dek_pool *pool,
+					      struct mlx5_crypto_dek_bulk *bulk)
+{
+	unsigned long *need_sync = bulk->need_sync;
+	unsigned long *in_use = bulk->in_use;
+	int i, freed, reused, avail_next;
+	bool first = true;
+
+	freed = MLX5_CRYPTO_DEK_BULK_CALC_FREED(bulk);
+
+	for (i = 0; freed && i < BITS_TO_LONGS(bulk->num_deks);
+			i++, need_sync++, in_use++) {
+		reused = hweight_long((*need_sync) ^ (*in_use));
+		if (!reused)
+			continue;
+
+		bulk->avail_deks += reused;
+		pool->avail_deks += reused;
+		*need_sync = *in_use;
+		if (first) {
+			avail_next = i * BITS_PER_TYPE(long);
+			if (bulk->avail_start > avail_next)
+				bulk->avail_start = avail_next;
+			first = false;
+		}
+
+		freed -= reused;
+	}
+}
+
+/* Return true if the bulk is reused, false if destroyed with delay */
+static bool mlx5_crypto_dek_bulk_handle_avail(struct mlx5_crypto_dek_pool *pool,
+					      struct mlx5_crypto_dek_bulk *bulk,
+					      struct list_head *destroy_list)
+{
+	if (list_empty(&pool->avail_list)) {
+		list_move(&bulk->entry, &pool->avail_list);
+		return true;
+	}
+
+	mlx5_crypto_dek_pool_remove_bulk(pool, bulk, true);
+	list_add(&bulk->entry, destroy_list);
+	return false;
+}
+
+static void mlx5_crypto_dek_pool_splice_destroy_list(struct mlx5_crypto_dek_pool *pool,
+						     struct list_head *list,
+						     struct list_head *head)
+{
+	spin_lock(&pool->destroy_lock);
+	list_splice_init(list, head);
+	spin_unlock(&pool->destroy_lock);
+}
+
+static void mlx5_crypto_dek_pool_free_wait_keys(struct mlx5_crypto_dek_pool *pool)
+{
+	struct mlx5_crypto_dek *dek, *next;
+
+	list_for_each_entry_safe(dek, next, &pool->wait_for_free, entry) {
+		list_del(&dek->entry);
+		mlx5_crypto_dek_free_locked(pool, dek);
+	}
+}
+
+/* For all the bulks in each list, reset the bits while sync.
+ * Move them to different lists according to the number of available DEKs.
+ * Destrory all the idle bulks, except one for quick service.
+ * And free DEKs in the waiting list at the end of this func.
+ */
+static void mlx5_crypto_dek_pool_reset_synced(struct mlx5_crypto_dek_pool *pool)
+{
+	struct mlx5_crypto_dek_bulk *bulk, *tmp;
+	LIST_HEAD(destroy_list);
+
+	list_for_each_entry_safe(bulk, tmp, &pool->partial_list, entry) {
+		mlx5_crypto_dek_bulk_reset_synced(pool, bulk);
+		if (MLX5_CRYPTO_DEK_BULK_IDLE(bulk))
+			mlx5_crypto_dek_bulk_handle_avail(pool, bulk, &destroy_list);
+	}
+
+	list_for_each_entry_safe(bulk, tmp, &pool->full_list, entry) {
+		mlx5_crypto_dek_bulk_reset_synced(pool, bulk);
+
+		if (!bulk->avail_deks)
+			continue;
+
+		if (MLX5_CRYPTO_DEK_BULK_IDLE(bulk))
+			mlx5_crypto_dek_bulk_handle_avail(pool, bulk, &destroy_list);
+		else
+			list_move(&bulk->entry, &pool->partial_list);
+	}
+
+	list_for_each_entry_safe(bulk, tmp, &pool->sync_list, entry) {
+		bulk->avail_deks = bulk->num_deks;
+		pool->avail_deks += bulk->num_deks;
+		if (mlx5_crypto_dek_bulk_handle_avail(pool, bulk, &destroy_list)) {
+			memset(bulk->need_sync, 0, BITS_TO_BYTES(bulk->num_deks));
+			bulk->avail_start = 0;
+		}
+	}
+
+	mlx5_crypto_dek_pool_free_wait_keys(pool);
+
+	if (!list_empty(&destroy_list)) {
+		mlx5_crypto_dek_pool_splice_destroy_list(pool, &destroy_list,
+							 &pool->destroy_list);
+		schedule_work(&pool->destroy_work);
+	}
+}
+
+static void mlx5_crypto_dek_sync_work_fn(struct work_struct *work)
+{
+	struct mlx5_crypto_dek_pool *pool =
+		container_of(work, struct mlx5_crypto_dek_pool, sync_work);
+	int err;
+
+	err = mlx5_crypto_cmd_sync_crypto(pool->mdev, BIT(pool->key_purpose));
+	mutex_lock(&pool->lock);
+	if (!err)
+		mlx5_crypto_dek_pool_reset_synced(pool);
+	pool->syncing = false;
+	mutex_unlock(&pool->lock);
+}
+
+struct mlx5_crypto_dek *mlx5_crypto_dek_create(struct mlx5_crypto_dek_pool *dek_pool,
+					       const void *key, u32 sz_bytes)
+{
+	struct mlx5_crypto_dek_priv *dek_priv = dek_pool->mdev->mlx5e_res.dek_priv;
+	struct mlx5_core_dev *mdev = dek_pool->mdev;
+	u32 key_purpose = dek_pool->key_purpose;
+	struct mlx5_crypto_dek_bulk *bulk;
+	struct mlx5_crypto_dek *dek;
+	int obj_offset;
+	int err;
+
+	dek = kzalloc(sizeof(*dek), GFP_KERNEL);
+	if (!dek)
+		return ERR_PTR(-ENOMEM);
+
+	if (!dek_priv) {
+		err = mlx5_crypto_create_dek_key(mdev, key, sz_bytes,
+						 key_purpose, &dek->obj_id);
+		goto out;
+	}
+
+	bulk = mlx5_crypto_dek_pool_pop(dek_pool, &obj_offset);
+	if (IS_ERR(bulk)) {
+		err = PTR_ERR(bulk);
+		goto out;
+	}
+
+	dek->bulk = bulk;
+	dek->obj_id = bulk->base_obj_id + obj_offset;
+	err = mlx5_crypto_modify_dek_key(mdev, key, sz_bytes, key_purpose,
+					 bulk->base_obj_id, obj_offset);
+	if (err) {
+		mlx5_crypto_dek_pool_push(dek_pool, dek);
+		return ERR_PTR(err);
+	}
+
+out:
+	if (err) {
+		kfree(dek);
+		return ERR_PTR(err);
+	}
+
+	return dek;
+}
+
+void mlx5_crypto_dek_destroy(struct mlx5_crypto_dek_pool *dek_pool,
+			     struct mlx5_crypto_dek *dek)
+{
+	struct mlx5_crypto_dek_priv *dek_priv = dek_pool->mdev->mlx5e_res.dek_priv;
+	struct mlx5_core_dev *mdev = dek_pool->mdev;
+
+	if (!dek_priv) {
+		mlx5_crypto_destroy_dek_key(mdev, dek->obj_id);
+		kfree(dek);
+	} else {
+		mlx5_crypto_dek_pool_push(dek_pool, dek);
+	}
+}
+
+static void mlx5_crypto_dek_free_destroy_list(struct list_head *destroy_list)
+{
+	struct mlx5_crypto_dek_bulk *bulk, *tmp;
+
+	list_for_each_entry_safe(bulk, tmp, destroy_list, entry)
+		mlx5_crypto_dek_bulk_free(bulk);
+}
+
+static void mlx5_crypto_dek_destroy_work_fn(struct work_struct *work)
+{
+	struct mlx5_crypto_dek_pool *pool =
+		container_of(work, struct mlx5_crypto_dek_pool, destroy_work);
+	LIST_HEAD(destroy_list);
+
+	mlx5_crypto_dek_pool_splice_destroy_list(pool, &pool->destroy_list,
+						 &destroy_list);
+	mlx5_crypto_dek_free_destroy_list(&destroy_list);
+}
+
+struct mlx5_crypto_dek_pool *
+mlx5_crypto_dek_pool_create(struct mlx5_core_dev *mdev, int key_purpose)
+{
+	struct mlx5_crypto_dek_pool *pool;
+
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return ERR_PTR(-ENOMEM);
+
+	pool->mdev = mdev;
+	pool->key_purpose = key_purpose;
+
+	mutex_init(&pool->lock);
+	INIT_LIST_HEAD(&pool->avail_list);
+	INIT_LIST_HEAD(&pool->partial_list);
+	INIT_LIST_HEAD(&pool->full_list);
+	INIT_LIST_HEAD(&pool->sync_list);
+	INIT_LIST_HEAD(&pool->wait_for_free);
+	INIT_WORK(&pool->sync_work, mlx5_crypto_dek_sync_work_fn);
+	spin_lock_init(&pool->destroy_lock);
+	INIT_LIST_HEAD(&pool->destroy_list);
+	INIT_WORK(&pool->destroy_work, mlx5_crypto_dek_destroy_work_fn);
+
+	return pool;
+}
+
+void mlx5_crypto_dek_pool_destroy(struct mlx5_crypto_dek_pool *pool)
+{
+	struct mlx5_crypto_dek_bulk *bulk, *tmp;
+
+	cancel_work_sync(&pool->sync_work);
+	cancel_work_sync(&pool->destroy_work);
+
+	mlx5_crypto_dek_pool_free_wait_keys(pool);
+
+	list_for_each_entry_safe(bulk, tmp, &pool->avail_list, entry)
+		mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false);
+
+	list_for_each_entry_safe(bulk, tmp, &pool->full_list, entry)
+		mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false);
+
+	list_for_each_entry_safe(bulk, tmp, &pool->sync_list, entry)
+		mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false);
+
+	list_for_each_entry_safe(bulk, tmp, &pool->partial_list, entry)
+		mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false);
+
+	mlx5_crypto_dek_free_destroy_list(&pool->destroy_list);
+
+	mutex_destroy(&pool->lock);
+
+	kfree(pool);
+}
+
+void mlx5_crypto_dek_cleanup(struct mlx5_crypto_dek_priv *dek_priv)
+{
+	if (!dek_priv)
+		return;
+
+	kfree(dek_priv);
+}
+
+struct mlx5_crypto_dek_priv *mlx5_crypto_dek_init(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_crypto_dek_priv *dek_priv;
+	int err;
+
+	if (!MLX5_CAP_CRYPTO(mdev, log_dek_max_alloc))
+		return NULL;
+
+	dek_priv = kzalloc(sizeof(*dek_priv), GFP_KERNEL);
+	if (!dek_priv)
+		return ERR_PTR(-ENOMEM);
+
+	dek_priv->mdev = mdev;
+	dek_priv->log_dek_obj_range = min_t(int, 12,
+					    MLX5_CAP_CRYPTO(mdev, log_dek_max_alloc));
+
+	/* sync all types of objects */
+	err = mlx5_crypto_cmd_sync_crypto(mdev, MLX5_CRYPTO_DEK_ALL_TYPE);
+	if (err)
+		goto err_sync_crypto;
+
+	mlx5_core_dbg(mdev, "Crypto DEK enabled, %d deks per alloc (max %d), total %d\n",
+		      1 << dek_priv->log_dek_obj_range,
+		      1 << MLX5_CAP_CRYPTO(mdev, log_dek_max_alloc),
+		      1 << MLX5_CAP_CRYPTO(mdev, log_max_num_deks));
+
+	return dek_priv;
+
+err_sync_crypto:
+	kfree(dek_priv);
+	return ERR_PTR(err);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h
new file mode 100644
index 0000000000..c819c047bb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LIB_CRYPTO_H__
+#define __MLX5_LIB_CRYPTO_H__
+
+enum {
+	MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_TLS,
+	MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_IPSEC,
+	MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_MACSEC,
+	MLX5_ACCEL_OBJ_TYPE_KEY_NUM,
+};
+
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+			       const void *key, u32 sz_bytes,
+			       u32 key_type, u32 *p_key_id);
+
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id);
+
+struct mlx5_crypto_dek_pool;
+struct mlx5_crypto_dek;
+
+struct mlx5_crypto_dek_pool *mlx5_crypto_dek_pool_create(struct mlx5_core_dev *mdev,
+							 int key_purpose);
+void mlx5_crypto_dek_pool_destroy(struct mlx5_crypto_dek_pool *pool);
+struct mlx5_crypto_dek *mlx5_crypto_dek_create(struct mlx5_crypto_dek_pool *dek_pool,
+					       const void *key, u32 sz_bytes);
+void mlx5_crypto_dek_destroy(struct mlx5_crypto_dek_pool *dek_pool,
+			     struct mlx5_crypto_dek *dek);
+u32 mlx5_crypto_dek_get_id(struct mlx5_crypto_dek *dek);
+
+struct mlx5_crypto_dek_priv *mlx5_crypto_dek_init(struct mlx5_core_dev *mdev);
+void mlx5_crypto_dek_cleanup(struct mlx5_crypto_dek_priv *dek_priv);
+#endif /* __MLX5_LIB_CRYPTO_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
new file mode 100644
index 0000000000..00e67910e3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include <linux/list.h>
+#include "lib/devcom.h"
+#include "mlx5_core.h"
+
+static LIST_HEAD(devcom_dev_list);
+static LIST_HEAD(devcom_comp_list);
+/* protect device list */
+static DEFINE_MUTEX(dev_list_lock);
+/* protect component list */
+static DEFINE_MUTEX(comp_list_lock);
+
+#define devcom_for_each_component(iter) \
+	list_for_each_entry(iter, &devcom_comp_list, comp_list)
+
+struct mlx5_devcom_dev {
+	struct list_head list;
+	struct mlx5_core_dev *dev;
+	struct kref ref;
+};
+
+struct mlx5_devcom_comp {
+	struct list_head comp_list;
+	enum mlx5_devcom_component id;
+	u64 key;
+	struct list_head comp_dev_list_head;
+	mlx5_devcom_event_handler_t handler;
+	struct kref ref;
+	bool ready;
+	struct rw_semaphore sem;
+};
+
+struct mlx5_devcom_comp_dev {
+	struct list_head list;
+	struct mlx5_devcom_comp *comp;
+	struct mlx5_devcom_dev *devc;
+	void __rcu *data;
+};
+
+static bool devcom_dev_exists(struct mlx5_core_dev *dev)
+{
+	struct mlx5_devcom_dev *iter;
+
+	list_for_each_entry(iter, &devcom_dev_list, list)
+		if (iter->dev == dev)
+			return true;
+
+	return false;
+}
+
+static struct mlx5_devcom_dev *
+mlx5_devcom_dev_alloc(struct mlx5_core_dev *dev)
+{
+	struct mlx5_devcom_dev *devc;
+
+	devc = kzalloc(sizeof(*devc), GFP_KERNEL);
+	if (!devc)
+		return NULL;
+
+	devc->dev = dev;
+	kref_init(&devc->ref);
+	return devc;
+}
+
+struct mlx5_devcom_dev *
+mlx5_devcom_register_device(struct mlx5_core_dev *dev)
+{
+	struct mlx5_devcom_dev *devc;
+
+	mutex_lock(&dev_list_lock);
+
+	if (devcom_dev_exists(dev)) {
+		devc = ERR_PTR(-EEXIST);
+		goto out;
+	}
+
+	devc = mlx5_devcom_dev_alloc(dev);
+	if (!devc) {
+		devc = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	list_add_tail(&devc->list, &devcom_dev_list);
+out:
+	mutex_unlock(&dev_list_lock);
+	return devc;
+}
+
+static void
+mlx5_devcom_dev_release(struct kref *ref)
+{
+	struct mlx5_devcom_dev *devc = container_of(ref, struct mlx5_devcom_dev, ref);
+
+	mutex_lock(&dev_list_lock);
+	list_del(&devc->list);
+	mutex_unlock(&dev_list_lock);
+	kfree(devc);
+}
+
+void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc)
+{
+	if (!IS_ERR_OR_NULL(devc))
+		kref_put(&devc->ref, mlx5_devcom_dev_release);
+}
+
+static struct mlx5_devcom_comp *
+mlx5_devcom_comp_alloc(u64 id, u64 key, mlx5_devcom_event_handler_t handler)
+{
+	struct mlx5_devcom_comp *comp;
+
+	comp = kzalloc(sizeof(*comp), GFP_KERNEL);
+	if (!comp)
+		return ERR_PTR(-ENOMEM);
+
+	comp->id = id;
+	comp->key = key;
+	comp->handler = handler;
+	init_rwsem(&comp->sem);
+	kref_init(&comp->ref);
+	INIT_LIST_HEAD(&comp->comp_dev_list_head);
+
+	return comp;
+}
+
+static void
+mlx5_devcom_comp_release(struct kref *ref)
+{
+	struct mlx5_devcom_comp *comp = container_of(ref, struct mlx5_devcom_comp, ref);
+
+	mutex_lock(&comp_list_lock);
+	list_del(&comp->comp_list);
+	mutex_unlock(&comp_list_lock);
+	kfree(comp);
+}
+
+static struct mlx5_devcom_comp_dev *
+devcom_alloc_comp_dev(struct mlx5_devcom_dev *devc,
+		      struct mlx5_devcom_comp *comp,
+		      void *data)
+{
+	struct mlx5_devcom_comp_dev *devcom;
+
+	devcom = kzalloc(sizeof(*devcom), GFP_KERNEL);
+	if (!devcom)
+		return ERR_PTR(-ENOMEM);
+
+	kref_get(&devc->ref);
+	devcom->devc = devc;
+	devcom->comp = comp;
+	rcu_assign_pointer(devcom->data, data);
+
+	down_write(&comp->sem);
+	list_add_tail(&devcom->list, &comp->comp_dev_list_head);
+	up_write(&comp->sem);
+
+	return devcom;
+}
+
+static void
+devcom_free_comp_dev(struct mlx5_devcom_comp_dev *devcom)
+{
+	struct mlx5_devcom_comp *comp = devcom->comp;
+
+	down_write(&comp->sem);
+	list_del(&devcom->list);
+	up_write(&comp->sem);
+
+	kref_put(&devcom->devc->ref, mlx5_devcom_dev_release);
+	kfree(devcom);
+	kref_put(&comp->ref, mlx5_devcom_comp_release);
+}
+
+static bool
+devcom_component_equal(struct mlx5_devcom_comp *devcom,
+		       enum mlx5_devcom_component id,
+		       u64 key)
+{
+	return devcom->id == id && devcom->key == key;
+}
+
+static struct mlx5_devcom_comp *
+devcom_component_get(struct mlx5_devcom_dev *devc,
+		     enum mlx5_devcom_component id,
+		     u64 key,
+		     mlx5_devcom_event_handler_t handler)
+{
+	struct mlx5_devcom_comp *comp;
+
+	devcom_for_each_component(comp) {
+		if (devcom_component_equal(comp, id, key)) {
+			if (handler == comp->handler) {
+				kref_get(&comp->ref);
+				return comp;
+			}
+
+			mlx5_core_err(devc->dev,
+				      "Cannot register existing devcom component with different handler\n");
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	return NULL;
+}
+
+struct mlx5_devcom_comp_dev *
+mlx5_devcom_register_component(struct mlx5_devcom_dev *devc,
+			       enum mlx5_devcom_component id,
+			       u64 key,
+			       mlx5_devcom_event_handler_t handler,
+			       void *data)
+{
+	struct mlx5_devcom_comp_dev *devcom;
+	struct mlx5_devcom_comp *comp;
+
+	if (IS_ERR_OR_NULL(devc))
+		return NULL;
+
+	mutex_lock(&comp_list_lock);
+	comp = devcom_component_get(devc, id, key, handler);
+	if (IS_ERR(comp)) {
+		devcom = ERR_PTR(-EINVAL);
+		goto out_unlock;
+	}
+
+	if (!comp) {
+		comp = mlx5_devcom_comp_alloc(id, key, handler);
+		if (IS_ERR(comp)) {
+			devcom = ERR_CAST(comp);
+			goto out_unlock;
+		}
+		list_add_tail(&comp->comp_list, &devcom_comp_list);
+	}
+	mutex_unlock(&comp_list_lock);
+
+	devcom = devcom_alloc_comp_dev(devc, comp, data);
+	if (IS_ERR(devcom))
+		kref_put(&comp->ref, mlx5_devcom_comp_release);
+
+	return devcom;
+
+out_unlock:
+	mutex_unlock(&comp_list_lock);
+	return devcom;
+}
+
+void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom)
+{
+	if (!IS_ERR_OR_NULL(devcom))
+		devcom_free_comp_dev(devcom);
+}
+
+int mlx5_devcom_send_event(struct mlx5_devcom_comp_dev *devcom,
+			   int event, int rollback_event,
+			   void *event_data)
+{
+	struct mlx5_devcom_comp_dev *pos;
+	struct mlx5_devcom_comp *comp;
+	int err = 0;
+	void *data;
+
+	if (IS_ERR_OR_NULL(devcom))
+		return -ENODEV;
+
+	comp = devcom->comp;
+	down_write(&comp->sem);
+	list_for_each_entry(pos, &comp->comp_dev_list_head, list) {
+		data = rcu_dereference_protected(pos->data, lockdep_is_held(&comp->sem));
+
+		if (pos != devcom && data) {
+			err = comp->handler(event, data, event_data);
+			if (err)
+				goto rollback;
+		}
+	}
+
+	up_write(&comp->sem);
+	return 0;
+
+rollback:
+	if (list_entry_is_head(pos, &comp->comp_dev_list_head, list))
+		goto out;
+	pos = list_prev_entry(pos, list);
+	list_for_each_entry_from_reverse(pos, &comp->comp_dev_list_head, list) {
+		data = rcu_dereference_protected(pos->data, lockdep_is_held(&comp->sem));
+
+		if (pos != devcom && data)
+			comp->handler(rollback_event, data, event_data);
+	}
+out:
+	up_write(&comp->sem);
+	return err;
+}
+
+void mlx5_devcom_comp_set_ready(struct mlx5_devcom_comp_dev *devcom, bool ready)
+{
+	WARN_ON(!rwsem_is_locked(&devcom->comp->sem));
+
+	WRITE_ONCE(devcom->comp->ready, ready);
+}
+
+bool mlx5_devcom_comp_is_ready(struct mlx5_devcom_comp_dev *devcom)
+{
+	if (IS_ERR_OR_NULL(devcom))
+		return false;
+
+	return READ_ONCE(devcom->comp->ready);
+}
+
+bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom_comp_dev *devcom)
+{
+	struct mlx5_devcom_comp *comp;
+
+	if (IS_ERR_OR_NULL(devcom))
+		return false;
+
+	comp = devcom->comp;
+	down_read(&comp->sem);
+	if (!READ_ONCE(comp->ready)) {
+		up_read(&comp->sem);
+		return false;
+	}
+
+	return true;
+}
+
+void mlx5_devcom_for_each_peer_end(struct mlx5_devcom_comp_dev *devcom)
+{
+	up_read(&devcom->comp->sem);
+}
+
+void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom_comp_dev *devcom,
+				     struct mlx5_devcom_comp_dev **pos)
+{
+	struct mlx5_devcom_comp *comp = devcom->comp;
+	struct mlx5_devcom_comp_dev *tmp;
+	void *data;
+
+	tmp = list_prepare_entry(*pos, &comp->comp_dev_list_head, list);
+
+	list_for_each_entry_continue(tmp, &comp->comp_dev_list_head, list) {
+		if (tmp != devcom) {
+			data = rcu_dereference_protected(tmp->data, lockdep_is_held(&comp->sem));
+			if (data)
+				break;
+		}
+	}
+
+	if (list_entry_is_head(tmp, &comp->comp_dev_list_head, list))
+		return NULL;
+
+	*pos = tmp;
+	return data;
+}
+
+void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom,
+					 struct mlx5_devcom_comp_dev **pos)
+{
+	struct mlx5_devcom_comp *comp = devcom->comp;
+	struct mlx5_devcom_comp_dev *tmp;
+	void *data;
+
+	tmp = list_prepare_entry(*pos, &comp->comp_dev_list_head, list);
+
+	list_for_each_entry_continue(tmp, &comp->comp_dev_list_head, list) {
+		if (tmp != devcom) {
+			/* This can change concurrently, however 'data' pointer will remain
+			 * valid for the duration of RCU read section.
+			 */
+			if (!READ_ONCE(comp->ready))
+				return NULL;
+			data = rcu_dereference(tmp->data);
+			if (data)
+				break;
+		}
+	}
+
+	if (list_entry_is_head(tmp, &comp->comp_dev_list_head, list))
+		return NULL;
+
+	*pos = tmp;
+	return data;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
new file mode 100644
index 0000000000..8389ac0af7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_DEVCOM_H__
+#define __LIB_MLX5_DEVCOM_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_devcom_component {
+	MLX5_DEVCOM_ESW_OFFLOADS,
+	MLX5_DEVCOM_NUM_COMPONENTS,
+};
+
+typedef int (*mlx5_devcom_event_handler_t)(int event,
+					   void *my_data,
+					   void *event_data);
+
+struct mlx5_devcom_dev *mlx5_devcom_register_device(struct mlx5_core_dev *dev);
+void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc);
+
+struct mlx5_devcom_comp_dev *
+mlx5_devcom_register_component(struct mlx5_devcom_dev *devc,
+			       enum mlx5_devcom_component id,
+			       u64 key,
+			       mlx5_devcom_event_handler_t handler,
+			       void *data);
+void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom);
+
+int mlx5_devcom_send_event(struct mlx5_devcom_comp_dev *devcom,
+			   int event, int rollback_event,
+			   void *event_data);
+
+void mlx5_devcom_comp_set_ready(struct mlx5_devcom_comp_dev *devcom, bool ready);
+bool mlx5_devcom_comp_is_ready(struct mlx5_devcom_comp_dev *devcom);
+
+bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom_comp_dev *devcom);
+void mlx5_devcom_for_each_peer_end(struct mlx5_devcom_comp_dev *devcom);
+void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom_comp_dev *devcom,
+				     struct mlx5_devcom_comp_dev **pos);
+
+#define mlx5_devcom_for_each_peer_entry(devcom, data, pos)                    \
+	for (pos = NULL, data = mlx5_devcom_get_next_peer_data(devcom, &pos); \
+	     data;                                                            \
+	     data = mlx5_devcom_get_next_peer_data(devcom, &pos))
+
+void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom,
+					 struct mlx5_devcom_comp_dev **pos);
+
+#define mlx5_devcom_for_each_peer_entry_rcu(devcom, data, pos)                    \
+	for (pos = NULL, data = mlx5_devcom_get_next_peer_data_rcu(devcom, &pos); \
+	     data;								  \
+	     data = mlx5_devcom_get_next_peer_data_rcu(devcom, &pos))
+
+#endif /* __LIB_MLX5_DEVCOM_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
new file mode 100644
index 0000000000..9482e51ac8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+struct mlx5_dm {
+	/* protect access to icm bitmask */
+	spinlock_t lock;
+	unsigned long *steering_sw_icm_alloc_blocks;
+	unsigned long *header_modify_sw_icm_alloc_blocks;
+	unsigned long *header_modify_pattern_sw_icm_alloc_blocks;
+};
+
+struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
+{
+	u64 header_modify_pattern_icm_blocks = 0;
+	u64 header_modify_icm_blocks = 0;
+	u64 steering_icm_blocks = 0;
+	struct mlx5_dm *dm;
+	bool support_v2;
+
+	if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM))
+		return NULL;
+
+	dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+	if (!dm)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&dm->lock);
+
+	if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) {
+		steering_icm_blocks =
+			BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+			    MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+		dm->steering_sw_icm_alloc_blocks =
+			bitmap_zalloc(steering_icm_blocks, GFP_KERNEL);
+		if (!dm->steering_sw_icm_alloc_blocks)
+			goto err_steering;
+	}
+
+	if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) {
+		header_modify_icm_blocks =
+			BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) -
+			    MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+		dm->header_modify_sw_icm_alloc_blocks =
+			bitmap_zalloc(header_modify_icm_blocks, GFP_KERNEL);
+		if (!dm->header_modify_sw_icm_alloc_blocks)
+			goto err_modify_hdr;
+	}
+
+	support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) &&
+		     MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) &&
+		     MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address);
+
+	if (support_v2) {
+		header_modify_pattern_icm_blocks =
+			BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) -
+			    MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+		dm->header_modify_pattern_sw_icm_alloc_blocks =
+			bitmap_zalloc(header_modify_pattern_icm_blocks, GFP_KERNEL);
+		if (!dm->header_modify_pattern_sw_icm_alloc_blocks)
+			goto err_pattern;
+	}
+
+	return dm;
+
+err_pattern:
+	bitmap_free(dm->header_modify_sw_icm_alloc_blocks);
+
+err_modify_hdr:
+	bitmap_free(dm->steering_sw_icm_alloc_blocks);
+
+err_steering:
+	kfree(dm);
+
+	return ERR_PTR(-ENOMEM);
+}
+
+void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_dm *dm = dev->dm;
+
+	if (!dev->dm)
+		return;
+
+	if (dm->steering_sw_icm_alloc_blocks) {
+		WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks,
+				      BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+					  MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+		bitmap_free(dm->steering_sw_icm_alloc_blocks);
+	}
+
+	if (dm->header_modify_sw_icm_alloc_blocks) {
+		WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks,
+				      BIT(MLX5_CAP_DEV_MEM(dev,
+							   log_header_modify_sw_icm_size) -
+				      MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+		bitmap_free(dm->header_modify_sw_icm_alloc_blocks);
+	}
+
+	if (dm->header_modify_pattern_sw_icm_alloc_blocks) {
+		WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks,
+				      BIT(MLX5_CAP_DEV_MEM(dev,
+							   log_header_modify_pattern_sw_icm_size) -
+					  MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+		bitmap_free(dm->header_modify_pattern_sw_icm_alloc_blocks);
+	}
+
+	kfree(dm);
+}
+
+int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+			 u64 length, u32 log_alignment, u16 uid,
+			 phys_addr_t *addr, u32 *obj_id)
+{
+	u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
+	struct mlx5_dm *dm = dev->dm;
+	unsigned long *block_map;
+	u64 icm_start_addr;
+	u32 log_icm_size;
+	u64 align_mask;
+	u32 max_blocks;
+	u64 block_idx;
+	void *sw_icm;
+	int ret;
+
+	if (!dev->dm)
+		return -EOPNOTSUPP;
+
+	if (!length || (length & (length - 1)) ||
+	    length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1))
+		return -EINVAL;
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+	switch (type) {
+	case MLX5_SW_ICM_TYPE_STEERING:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+		log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
+		block_map = dm->steering_sw_icm_alloc_blocks;
+		break;
+	case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+		log_icm_size = MLX5_CAP_DEV_MEM(dev,
+						log_header_modify_sw_icm_size);
+		block_map = dm->header_modify_sw_icm_alloc_blocks;
+		break;
+	case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+						    header_modify_pattern_sw_icm_start_address);
+		log_icm_size = MLX5_CAP_DEV_MEM(dev,
+						log_header_modify_pattern_sw_icm_size);
+		block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!block_map)
+		return -EOPNOTSUPP;
+
+	max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+	if (log_alignment < MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+		log_alignment = MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+	align_mask = BIT(log_alignment - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) - 1;
+
+	spin_lock(&dm->lock);
+	block_idx = bitmap_find_next_zero_area(block_map, max_blocks, 0,
+					       num_blocks, align_mask);
+
+	if (block_idx < max_blocks)
+		bitmap_set(block_map,
+			   block_idx, num_blocks);
+
+	spin_unlock(&dm->lock);
+
+	if (block_idx >= max_blocks)
+		return -ENOMEM;
+
+	sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
+	icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+	MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
+		   icm_start_addr);
+	MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		spin_lock(&dm->lock);
+		bitmap_clear(block_map,
+			     block_idx, num_blocks);
+		spin_unlock(&dm->lock);
+
+		return ret;
+	}
+
+	*addr = icm_start_addr;
+	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc);
+
+int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+			   u64 length, u16 uid, phys_addr_t addr, u32 obj_id)
+{
+	u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	struct mlx5_dm *dm = dev->dm;
+	unsigned long *block_map;
+	u64 icm_start_addr;
+	u64 start_idx;
+	int err;
+
+	if (!dev->dm)
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case MLX5_SW_ICM_TYPE_STEERING:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+		block_map = dm->steering_sw_icm_alloc_blocks;
+		break;
+	case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+		block_map = dm->header_modify_sw_icm_alloc_blocks;
+		break;
+	case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+						    header_modify_pattern_sw_icm_start_address);
+		block_map = dm->header_modify_pattern_sw_icm_alloc_blocks;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+	spin_lock(&dm->lock);
+	bitmap_clear(block_map,
+		     start_idx, num_blocks);
+	spin_unlock(&dm->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
new file mode 100644
index 0000000000..69a7545977
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018-2021, Mellanox Technologies inc.  All rights reserved. */
+
+#ifndef __LIB_MLX5_EQ_H__
+#define __LIB_MLX5_EQ_H__
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/mlx5/cq.h>
+
+#define MLX5_EQE_SIZE       (sizeof(struct mlx5_eqe))
+
+struct mlx5_eq_tasklet {
+	struct list_head      list;
+	struct list_head      process_list;
+	struct tasklet_struct task;
+	spinlock_t            lock; /* lock completion tasklet list */
+};
+
+struct mlx5_cq_table {
+	spinlock_t              lock;	/* protect radix tree */
+	struct radix_tree_root  tree;
+};
+
+struct mlx5_eq {
+	struct mlx5_frag_buf_ctrl fbc;
+	struct mlx5_frag_buf    frag_buf;
+	struct mlx5_core_dev    *dev;
+	struct mlx5_cq_table    cq_table;
+	__be32 __iomem	        *doorbell;
+	u32                     cons_index;
+	unsigned int            vecidx;
+	unsigned int            irqn;
+	u8                      eqn;
+	struct mlx5_rsc_debug   *dbg;
+	struct mlx5_irq         *irq;
+};
+
+struct mlx5_eq_async {
+	struct mlx5_eq          core;
+	struct notifier_block   irq_nb;
+	spinlock_t              lock; /* To avoid irq EQ handle races with resiliency flows */
+};
+
+struct mlx5_eq_comp {
+	struct mlx5_eq          core;
+	struct notifier_block   irq_nb;
+	struct mlx5_eq_tasklet  tasklet_ctx;
+	struct list_head        list;
+};
+
+static inline u32 eq_get_size(struct mlx5_eq *eq)
+{
+	return eq->fbc.sz_m1 + 1;
+}
+
+static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+	return mlx5_frag_buf_get_wqe(&eq->fbc, entry);
+}
+
+static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+	struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & eq->fbc.sz_m1);
+
+	return (eqe->owner ^ (eq->cons_index >> eq->fbc.log_sz)) & 1 ? NULL : eqe;
+}
+
+static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+	__be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+	u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+	__raw_writel((__force u32)cpu_to_be32(val), addr);
+	/* We still want ordering, just not swabbing, so add a barrier */
+	mb();
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev);
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_eq_table_create(struct mlx5_core_dev *dev);
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t);
+struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
+#endif
+
+int mlx5_comp_irqn_get(struct mlx5_core_dev *dev, int vector, unsigned int *irqn);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h
new file mode 100644
index 0000000000..a0f7faea31
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __LIB_EVENTS_H__
+#define __LIB_EVENTS_H__
+
+#include "mlx5_core.h"
+
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK    0xF
+
+enum port_module_event_status_type {
+	MLX5_MODULE_STATUS_PLUGGED   = 0x1,
+	MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+	MLX5_MODULE_STATUS_ERROR     = 0x3,
+	MLX5_MODULE_STATUS_DISABLED  = 0x4,
+	MLX5_MODULE_STATUS_NUM,
+};
+
+enum  port_module_event_error_type {
+	MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED    = 0x0,
+	MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX  = 0x1,
+	MLX5_MODULE_EVENT_ERROR_BUS_STUCK                = 0x2,
+	MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT  = 0x3,
+	MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4,
+	MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER       = 0x5,
+	MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE         = 0x6,
+	MLX5_MODULE_EVENT_ERROR_BAD_CABLE                = 0x7,
+	MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc,
+	MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_pme_stats {
+	u64 status_counters[MLX5_MODULE_STATUS_NUM];
+	u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
new file mode 100644
index 0000000000..a80ecb672f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies.
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/fs.h>
+
+#include "lib/fs_chains.h"
+#include "fs_ft_pool.h"
+#include "en/mapping.h"
+#include "fs_core.h"
+#include "en_tc.h"
+
+#define chains_lock(chains) ((chains)->lock)
+#define chains_ht(chains) ((chains)->chains_ht)
+#define prios_ht(chains) ((chains)->prios_ht)
+#define chains_default_ft(chains) ((chains)->chains_default_ft)
+#define chains_end_ft(chains) ((chains)->chains_end_ft)
+#define FT_TBL_SZ (64 * 1024)
+
+struct mlx5_fs_chains {
+	struct mlx5_core_dev *dev;
+
+	struct rhashtable chains_ht;
+	struct rhashtable prios_ht;
+	/* Protects above chains_ht and prios_ht */
+	struct mutex lock;
+
+	struct mlx5_flow_table *chains_default_ft;
+	struct mlx5_flow_table *chains_end_ft;
+	struct mapping_ctx *chains_mapping;
+
+	enum mlx5_flow_namespace_type ns;
+	u32 group_num;
+	u32 flags;
+	int fs_base_prio;
+	int fs_base_level;
+};
+
+struct fs_chain {
+	struct rhash_head node;
+
+	u32 chain;
+
+	int ref;
+	int id;
+
+	struct mlx5_fs_chains *chains;
+	struct list_head prios_list;
+	struct mlx5_flow_handle *restore_rule;
+	struct mlx5_modify_hdr *miss_modify_hdr;
+};
+
+struct prio_key {
+	u32 chain;
+	u32 prio;
+	u32 level;
+};
+
+struct prio {
+	struct rhash_head node;
+	struct list_head list;
+
+	struct prio_key key;
+
+	int ref;
+
+	struct fs_chain *chain;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_table *next_ft;
+	struct mlx5_flow_group *miss_group;
+	struct mlx5_flow_handle *miss_rule;
+};
+
+static const struct rhashtable_params chain_params = {
+	.head_offset = offsetof(struct fs_chain, node),
+	.key_offset = offsetof(struct fs_chain, chain),
+	.key_len = sizeof_field(struct fs_chain, chain),
+	.automatic_shrinking = true,
+};
+
+static const struct rhashtable_params prio_params = {
+	.head_offset = offsetof(struct prio, node),
+	.key_offset = offsetof(struct prio, key),
+	.key_len = sizeof_field(struct prio, key),
+	.automatic_shrinking = true,
+};
+
+bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains)
+{
+	return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+}
+
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{
+	return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+}
+
+bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains)
+{
+	return mlx5_chains_prios_supported(chains) &&
+	       mlx5_chains_ignore_flow_level_supported(chains);
+}
+
+u32 mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains)
+{
+	if (!mlx5_chains_prios_supported(chains))
+		return 1;
+
+	if (mlx5_chains_ignore_flow_level_supported(chains))
+		return UINT_MAX - 1;
+
+	/* We should get here only for eswitch case */
+	return FDB_TC_MAX_CHAIN;
+}
+
+u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
+{
+	return mlx5_chains_get_chain_range(chains) + 1;
+}
+
+u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+{
+	if (mlx5_chains_ignore_flow_level_supported(chains))
+		return UINT_MAX;
+
+	if (!chains->dev->priv.eswitch ||
+	    chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
+		return 1;
+
+	/* We should get here only for eswitch case */
+	return FDB_TC_MAX_PRIO;
+}
+
+static unsigned int mlx5_chains_get_level_range(struct mlx5_fs_chains *chains)
+{
+	if (mlx5_chains_ignore_flow_level_supported(chains))
+		return UINT_MAX;
+
+	/* Same value for FDB and NIC RX tables */
+	return FDB_TC_LEVELS_PER_PRIO;
+}
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+		       struct mlx5_flow_table *ft)
+{
+	chains_end_ft(chains) = ft;
+}
+
+static struct mlx5_flow_table *
+mlx5_chains_create_table(struct mlx5_fs_chains *chains,
+			 u32 chain, u32 prio, u32 level)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *ft;
+	int sz;
+
+	if (chains->flags & MLX5_CHAINS_FT_TUNNEL_SUPPORTED)
+		ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+				  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+	sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE;
+	ft_attr.max_fte = sz;
+
+	/* We use chains_default_ft(chains) as the table's next_ft till
+	 * ignore_flow_level is allowed on FT creation and not just for FTEs.
+	 * Instead caller should add an explicit miss rule if needed.
+	 */
+	ft_attr.next_ft = chains_default_ft(chains);
+
+	/* The root table(chain 0, prio 1, level 0) is required to be
+	 * connected to the previous fs_core managed prio.
+	 * We always create it, as a managed table, in order to align with
+	 * fs_core logic.
+	 */
+	if (!mlx5_chains_ignore_flow_level_supported(chains) ||
+	    (chain == 0 && prio == 1 && level == 0)) {
+		ft_attr.level = chains->fs_base_level;
+		ft_attr.prio = chains->fs_base_prio + prio - 1;
+		ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
+			mlx5_get_fdb_sub_ns(chains->dev, chain) :
+			mlx5_get_flow_namespace(chains->dev, chains->ns);
+	} else {
+		ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
+		ft_attr.prio = chains->fs_base_prio;
+		/* Firmware doesn't allow us to create another level 0 table,
+		 * so we create all unmanaged tables as level 1 (base + 1).
+		 *
+		 * To connect them, we use explicit miss rules with
+		 * ignore_flow_level. Caller is responsible to create
+		 * these rules (if needed).
+		 */
+		ft_attr.level = chains->fs_base_level + 1;
+		ns = mlx5_get_flow_namespace(chains->dev, chains->ns);
+	}
+
+	ft_attr.autogroup.num_reserved_entries = 2;
+	ft_attr.autogroup.max_num_groups = chains->group_num;
+	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+	if (IS_ERR(ft)) {
+		mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
+			       (int)PTR_ERR(ft), chain, prio, level, sz);
+		return ft;
+	}
+
+	return ft;
+}
+
+static int
+create_chain_restore(struct fs_chain *chain)
+{
+	struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+	u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+	struct mlx5_fs_chains *chains = chain->chains;
+	enum mlx5e_tc_attr_to_reg mapped_obj_to_reg;
+	struct mlx5_modify_hdr *mod_hdr;
+	u32 index;
+	int err;
+
+	if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) ||
+	    !mlx5_chains_prios_supported(chains) ||
+	    !chains->chains_mapping)
+		return 0;
+
+	err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
+	if (err)
+		return err;
+	if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
+		/* we got the special default flow tag id, so we won't know
+		 * if we actually marked the packet with the restore rule
+		 * we create.
+		 *
+		 * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
+		 */
+		err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
+		mapping_remove(chains->chains_mapping, MLX5_FS_DEFAULT_FLOW_TAG);
+		if (err)
+			return err;
+	}
+
+	chain->id = index;
+
+	if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) {
+		mapped_obj_to_reg = MAPPED_OBJ_TO_REG;
+		chain->restore_rule = esw_add_restore_rule(esw, chain->id);
+		if (IS_ERR(chain->restore_rule)) {
+			err = PTR_ERR(chain->restore_rule);
+			goto err_rule;
+		}
+	} else if (chains->ns == MLX5_FLOW_NAMESPACE_KERNEL) {
+		/* For NIC RX we don't need a restore rule
+		 * since we write the metadata to reg_b
+		 * that is passed to SW directly.
+		 */
+		mapped_obj_to_reg = NIC_MAPPED_OBJ_TO_REG;
+	} else {
+		err = -EINVAL;
+		goto err_rule;
+	}
+
+	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, modact, field,
+		 mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mfield);
+	MLX5_SET(set_action_in, modact, offset,
+		 mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].moffset);
+	MLX5_SET(set_action_in, modact, length,
+		 mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen == 32 ?
+		 0 : mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen);
+	MLX5_SET(set_action_in, modact, data, chain->id);
+	mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns,
+					   1, modact);
+	if (IS_ERR(mod_hdr)) {
+		err = PTR_ERR(mod_hdr);
+		goto err_mod_hdr;
+	}
+	chain->miss_modify_hdr = mod_hdr;
+
+	return 0;
+
+err_mod_hdr:
+	if (!IS_ERR_OR_NULL(chain->restore_rule))
+		mlx5_del_flow_rules(chain->restore_rule);
+err_rule:
+	/* Datapath can't find this mapping, so we can safely remove it */
+	mapping_remove(chains->chains_mapping, chain->id);
+	return err;
+}
+
+static void destroy_chain_restore(struct fs_chain *chain)
+{
+	struct mlx5_fs_chains *chains = chain->chains;
+
+	if (!chain->miss_modify_hdr)
+		return;
+
+	if (chain->restore_rule)
+		mlx5_del_flow_rules(chain->restore_rule);
+
+	mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr);
+	mapping_remove(chains->chains_mapping, chain->id);
+}
+
+static struct fs_chain *
+mlx5_chains_create_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+	struct fs_chain *chain_s = NULL;
+	int err;
+
+	chain_s = kvzalloc(sizeof(*chain_s), GFP_KERNEL);
+	if (!chain_s)
+		return ERR_PTR(-ENOMEM);
+
+	chain_s->chains = chains;
+	chain_s->chain = chain;
+	INIT_LIST_HEAD(&chain_s->prios_list);
+
+	err = create_chain_restore(chain_s);
+	if (err)
+		goto err_restore;
+
+	err = rhashtable_insert_fast(&chains_ht(chains), &chain_s->node,
+				     chain_params);
+	if (err)
+		goto err_insert;
+
+	return chain_s;
+
+err_insert:
+	destroy_chain_restore(chain_s);
+err_restore:
+	kvfree(chain_s);
+	return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_chain(struct fs_chain *chain)
+{
+	struct mlx5_fs_chains *chains = chain->chains;
+
+	rhashtable_remove_fast(&chains_ht(chains), &chain->node,
+			       chain_params);
+
+	destroy_chain_restore(chain);
+	kvfree(chain);
+}
+
+static struct fs_chain *
+mlx5_chains_get_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+	struct fs_chain *chain_s;
+
+	chain_s = rhashtable_lookup_fast(&chains_ht(chains), &chain,
+					 chain_params);
+	if (!chain_s) {
+		chain_s = mlx5_chains_create_chain(chains, chain);
+		if (IS_ERR(chain_s))
+			return chain_s;
+	}
+
+	chain_s->ref++;
+
+	return chain_s;
+}
+
+static struct mlx5_flow_handle *
+mlx5_chains_add_miss_rule(struct fs_chain *chain,
+			  struct mlx5_flow_table *ft,
+			  struct mlx5_flow_table *next_ft)
+{
+	struct mlx5_fs_chains *chains = chain->chains;
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act act = {};
+
+	act.flags  = FLOW_ACT_NO_APPEND;
+	if (mlx5_chains_ignore_flow_level_supported(chain->chains))
+		act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+
+	act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	dest.type  = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = next_ft;
+
+	if (chains->chains_mapping && next_ft == chains_end_ft(chains) &&
+	    chain->chain != mlx5_chains_get_nf_ft_chain(chains) &&
+	    mlx5_chains_prios_supported(chains)) {
+		act.modify_hdr = chain->miss_modify_hdr;
+		act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+	}
+
+	return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1);
+}
+
+static int
+mlx5_chains_update_prio_prevs(struct prio *prio,
+			      struct mlx5_flow_table *next_ft)
+{
+	struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
+	struct fs_chain *chain = prio->chain;
+	struct prio *pos;
+	int n = 0, err;
+
+	if (prio->key.level)
+		return 0;
+
+	/* Iterate in reverse order until reaching the level 0 rule of
+	 * the previous priority, adding all the miss rules first, so we can
+	 * revert them if any of them fails.
+	 */
+	pos = prio;
+	list_for_each_entry_continue_reverse(pos,
+					     &chain->prios_list,
+					     list) {
+		miss_rules[n] = mlx5_chains_add_miss_rule(chain,
+							  pos->ft,
+							  next_ft);
+		if (IS_ERR(miss_rules[n])) {
+			err = PTR_ERR(miss_rules[n]);
+			goto err_prev_rule;
+		}
+
+		n++;
+		if (!pos->key.level)
+			break;
+	}
+
+	/* Success, delete old miss rules, and update the pointers. */
+	n = 0;
+	pos = prio;
+	list_for_each_entry_continue_reverse(pos,
+					     &chain->prios_list,
+					     list) {
+		mlx5_del_flow_rules(pos->miss_rule);
+
+		pos->miss_rule = miss_rules[n];
+		pos->next_ft = next_ft;
+
+		n++;
+		if (!pos->key.level)
+			break;
+	}
+
+	return 0;
+
+err_prev_rule:
+	while (--n >= 0)
+		mlx5_del_flow_rules(miss_rules[n]);
+
+	return err;
+}
+
+static void
+mlx5_chains_put_chain(struct fs_chain *chain)
+{
+	if (--chain->ref == 0)
+		mlx5_chains_destroy_chain(chain);
+}
+
+static struct prio *
+mlx5_chains_create_prio(struct mlx5_fs_chains *chains,
+			u32 chain, u32 prio, u32 level)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_handle *miss_rule;
+	struct mlx5_flow_group *miss_group;
+	struct mlx5_flow_table *next_ft;
+	struct mlx5_flow_table *ft;
+	struct fs_chain *chain_s;
+	struct list_head *pos;
+	struct prio *prio_s;
+	u32 *flow_group_in;
+	int err;
+
+	chain_s = mlx5_chains_get_chain(chains, chain);
+	if (IS_ERR(chain_s))
+		return ERR_CAST(chain_s);
+
+	prio_s = kvzalloc(sizeof(*prio_s), GFP_KERNEL);
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!prio_s || !flow_group_in) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	/* Chain's prio list is sorted by prio and level.
+	 * And all levels of some prio point to the next prio's level 0.
+	 * Example list (prio, level):
+	 * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
+	 * In hardware, we will we have the following pointers:
+	 * (3,0) -> (5,0) -> (7,0) -> Slow path
+	 * (3,1) -> (5,0)
+	 * (5,1) -> (7,0)
+	 * (6,1) -> (7,0)
+	 */
+
+	/* Default miss for each chain: */
+	next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
+		  chains_default_ft(chains) :
+		  chains_end_ft(chains);
+	list_for_each(pos, &chain_s->prios_list) {
+		struct prio *p = list_entry(pos, struct prio, list);
+
+		/* exit on first pos that is larger */
+		if (prio < p->key.prio || (prio == p->key.prio &&
+					   level < p->key.level)) {
+			/* Get next level 0 table */
+			next_ft = p->key.level == 0 ? p->ft : p->next_ft;
+			break;
+		}
+	}
+
+	ft = mlx5_chains_create_table(chains, chain, prio, level);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		goto err_create;
+	}
+
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
+		 ft->max_fte - 2);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+		 ft->max_fte - 1);
+	miss_group = mlx5_create_flow_group(ft, flow_group_in);
+	if (IS_ERR(miss_group)) {
+		err = PTR_ERR(miss_group);
+		goto err_group;
+	}
+
+	/* Add miss rule to next_ft */
+	miss_rule = mlx5_chains_add_miss_rule(chain_s, ft, next_ft);
+	if (IS_ERR(miss_rule)) {
+		err = PTR_ERR(miss_rule);
+		goto err_miss_rule;
+	}
+
+	prio_s->miss_group = miss_group;
+	prio_s->miss_rule = miss_rule;
+	prio_s->next_ft = next_ft;
+	prio_s->chain = chain_s;
+	prio_s->key.chain = chain;
+	prio_s->key.prio = prio;
+	prio_s->key.level = level;
+	prio_s->ft = ft;
+
+	err = rhashtable_insert_fast(&prios_ht(chains), &prio_s->node,
+				     prio_params);
+	if (err)
+		goto err_insert;
+
+	list_add(&prio_s->list, pos->prev);
+
+	/* Table is ready, connect it */
+	err = mlx5_chains_update_prio_prevs(prio_s, ft);
+	if (err)
+		goto err_update;
+
+	kvfree(flow_group_in);
+	return prio_s;
+
+err_update:
+	list_del(&prio_s->list);
+	rhashtable_remove_fast(&prios_ht(chains), &prio_s->node,
+			       prio_params);
+err_insert:
+	mlx5_del_flow_rules(miss_rule);
+err_miss_rule:
+	mlx5_destroy_flow_group(miss_group);
+err_group:
+	mlx5_destroy_flow_table(ft);
+err_create:
+err_alloc:
+	kvfree(prio_s);
+	kvfree(flow_group_in);
+	mlx5_chains_put_chain(chain_s);
+	return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains,
+			 struct prio *prio)
+{
+	struct fs_chain *chain = prio->chain;
+
+	WARN_ON(mlx5_chains_update_prio_prevs(prio,
+					      prio->next_ft));
+
+	list_del(&prio->list);
+	rhashtable_remove_fast(&prios_ht(chains), &prio->node,
+			       prio_params);
+	mlx5_del_flow_rules(prio->miss_rule);
+	mlx5_destroy_flow_group(prio->miss_group);
+	mlx5_destroy_flow_table(prio->ft);
+	mlx5_chains_put_chain(chain);
+	kvfree(prio);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level)
+{
+	struct mlx5_flow_table *prev_fts;
+	struct prio *prio_s;
+	struct prio_key key;
+	int l = 0;
+
+	if ((chain > mlx5_chains_get_chain_range(chains) &&
+	     chain != mlx5_chains_get_nf_ft_chain(chains)) ||
+	    prio > mlx5_chains_get_prio_range(chains) ||
+	    level > mlx5_chains_get_level_range(chains))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	/* create earlier levels for correct fs_core lookup when
+	 * connecting tables.
+	 */
+	for (l = 0; l < level; l++) {
+		prev_fts = mlx5_chains_get_table(chains, chain, prio, l);
+		if (IS_ERR(prev_fts)) {
+			prio_s = ERR_CAST(prev_fts);
+			goto err_get_prevs;
+		}
+	}
+
+	key.chain = chain;
+	key.prio = prio;
+	key.level = level;
+
+	mutex_lock(&chains_lock(chains));
+	prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+					prio_params);
+	if (!prio_s) {
+		prio_s = mlx5_chains_create_prio(chains, chain,
+						 prio, level);
+		if (IS_ERR(prio_s))
+			goto err_create_prio;
+	}
+
+	++prio_s->ref;
+	mutex_unlock(&chains_lock(chains));
+
+	return prio_s->ft;
+
+err_create_prio:
+	mutex_unlock(&chains_lock(chains));
+err_get_prevs:
+	while (--l >= 0)
+		mlx5_chains_put_table(chains, chain, prio, l);
+	return ERR_CAST(prio_s);
+}
+
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level)
+{
+	struct prio *prio_s;
+	struct prio_key key;
+
+	key.chain = chain;
+	key.prio = prio;
+	key.level = level;
+
+	mutex_lock(&chains_lock(chains));
+	prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+					prio_params);
+	if (!prio_s)
+		goto err_get_prio;
+
+	if (--prio_s->ref == 0)
+		mlx5_chains_destroy_prio(chains, prio_s);
+	mutex_unlock(&chains_lock(chains));
+
+	while (level-- > 0)
+		mlx5_chains_put_table(chains, chain, prio, level);
+
+	return;
+
+err_get_prio:
+	mutex_unlock(&chains_lock(chains));
+	WARN_ONCE(1,
+		  "Couldn't find table: (chain: %d prio: %d level: %d)",
+		  chain, prio, level);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains)
+{
+	return chains_end_ft(chains);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains)
+{
+	u32 chain, prio, level;
+	int err;
+
+	if (!mlx5_chains_ignore_flow_level_supported(chains)) {
+		err = -EOPNOTSUPP;
+
+		mlx5_core_warn(chains->dev,
+			       "Couldn't create global flow table, ignore_flow_level not supported.");
+		goto err_ignore;
+	}
+
+	chain = mlx5_chains_get_chain_range(chains),
+	prio = mlx5_chains_get_prio_range(chains);
+	level = mlx5_chains_get_level_range(chains);
+
+	return mlx5_chains_create_table(chains, chain, prio, level);
+
+err_ignore:
+	return ERR_PTR(err);
+}
+
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+				 struct mlx5_flow_table *ft)
+{
+	mlx5_destroy_flow_table(ft);
+}
+
+static struct mlx5_fs_chains *
+mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+	struct mlx5_fs_chains *chains;
+	int err;
+
+	chains = kzalloc(sizeof(*chains), GFP_KERNEL);
+	if (!chains)
+		return ERR_PTR(-ENOMEM);
+
+	chains->dev = dev;
+	chains->flags = attr->flags;
+	chains->ns = attr->ns;
+	chains->group_num = attr->max_grp_num;
+	chains->chains_mapping = attr->mapping;
+	chains->fs_base_prio = attr->fs_base_prio;
+	chains->fs_base_level = attr->fs_base_level;
+	chains_default_ft(chains) = chains_end_ft(chains) = attr->default_ft;
+
+	err = rhashtable_init(&chains_ht(chains), &chain_params);
+	if (err)
+		goto init_chains_ht_err;
+
+	err = rhashtable_init(&prios_ht(chains), &prio_params);
+	if (err)
+		goto init_prios_ht_err;
+
+	mutex_init(&chains_lock(chains));
+
+	return chains;
+
+init_prios_ht_err:
+	rhashtable_destroy(&chains_ht(chains));
+init_chains_ht_err:
+	kfree(chains);
+	return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_cleanup(struct mlx5_fs_chains *chains)
+{
+	mutex_destroy(&chains_lock(chains));
+	rhashtable_destroy(&prios_ht(chains));
+	rhashtable_destroy(&chains_ht(chains));
+
+	kfree(chains);
+}
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+	struct mlx5_fs_chains *chains;
+
+	chains = mlx5_chains_init(dev, attr);
+
+	return chains;
+}
+
+void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains)
+{
+	mlx5_chains_cleanup(chains);
+}
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+			      u32 *chain_mapping)
+{
+	struct mapping_ctx *ctx = chains->chains_mapping;
+	struct mlx5_mapped_obj mapped_obj = {};
+
+	mapped_obj.type = MLX5_MAPPED_OBJ_CHAIN;
+	mapped_obj.chain = chain;
+	return mapping_add(ctx, &mapped_obj, chain_mapping);
+}
+
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping)
+{
+	struct mapping_ctx *ctx = chains->chains_mapping;
+
+	return mapping_remove(ctx, chain_mapping);
+}
+
+void
+mlx5_chains_print_info(struct mlx5_fs_chains *chains)
+{
+	mlx5_core_dbg(chains->dev, "Flow table chains groups(%d)\n", chains->group_num);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
new file mode 100644
index 0000000000..8972fe0572
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_ESW_CHAINS_H__
+#define __ML5_ESW_CHAINS_H__
+
+#include <linux/mlx5/fs.h>
+
+struct mlx5_fs_chains;
+struct mlx5_mapped_obj;
+
+enum mlx5_chains_flags {
+	MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
+	MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED = BIT(1),
+	MLX5_CHAINS_FT_TUNNEL_SUPPORTED = BIT(2),
+};
+
+struct mlx5_chains_attr {
+	enum mlx5_flow_namespace_type ns;
+	int fs_base_prio;
+	int fs_base_level;
+	u32 flags;
+	u32 max_grp_num;
+	struct mlx5_flow_table *default_ft;
+	struct mapping_ctx *mapping;
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+bool
+mlx5_chains_prios_supported(struct mlx5_fs_chains *chains);
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains);
+bool
+mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level);
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level);
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains);
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+				 struct mlx5_flow_table *ft);
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+			      u32 *chain_mapping);
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains,
+			      u32 chain_mapping);
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr);
+void mlx5_chains_destroy(struct mlx5_fs_chains *chains);
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+		       struct mlx5_flow_table *ft);
+void
+mlx5_chains_print_info(struct mlx5_fs_chains *chains);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline bool
+mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{ return false; }
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+		      u32 level) {};
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) { return ERR_PTR(-EOPNOTSUPP); }
+
+static inline struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{ return NULL; }
+static inline void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains) {}
+static inline void
+mlx5_chains_print_info(struct mlx5_fs_chains *chains) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __ML5_ESW_CHAINS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
new file mode 100644
index 0000000000..b78f2ba25c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
@@ -0,0 +1,608 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/fs_ttc.h"
+
+#define MLX5_TTC_NUM_GROUPS	3
+#define MLX5_TTC_GROUP1_SIZE	(BIT(3) + MLX5_NUM_TUNNEL_TT)
+#define MLX5_TTC_GROUP2_SIZE	 BIT(1)
+#define MLX5_TTC_GROUP3_SIZE	 BIT(0)
+#define MLX5_TTC_TABLE_SIZE	(MLX5_TTC_GROUP1_SIZE +\
+				 MLX5_TTC_GROUP2_SIZE +\
+				 MLX5_TTC_GROUP3_SIZE)
+
+#define MLX5_INNER_TTC_NUM_GROUPS	3
+#define MLX5_INNER_TTC_GROUP1_SIZE	BIT(3)
+#define MLX5_INNER_TTC_GROUP2_SIZE	BIT(1)
+#define MLX5_INNER_TTC_GROUP3_SIZE	BIT(0)
+#define MLX5_INNER_TTC_TABLE_SIZE	(MLX5_INNER_TTC_GROUP1_SIZE +\
+					 MLX5_INNER_TTC_GROUP2_SIZE +\
+					 MLX5_INNER_TTC_GROUP3_SIZE)
+
+/* L3/L4 traffic type classifier */
+struct mlx5_ttc_table {
+	int num_groups;
+	struct mlx5_flow_table *t;
+	struct mlx5_flow_group **g;
+	struct mlx5_ttc_rule rules[MLX5_NUM_TT];
+	struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc)
+{
+	return ttc->t;
+}
+
+static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc)
+{
+	int i;
+
+	for (i = 0; i < MLX5_NUM_TT; i++) {
+		if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
+			mlx5_del_flow_rules(ttc->rules[i].rule);
+			ttc->rules[i].rule = NULL;
+		}
+	}
+
+	for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) {
+		if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+			mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+			ttc->tunnel_rules[i] = NULL;
+		}
+	}
+}
+
+struct mlx5_etype_proto {
+	u16 etype;
+	u8 proto;
+};
+
+static struct mlx5_etype_proto ttc_rules[] = {
+	[MLX5_TT_IPV4_TCP] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_TCP,
+	},
+	[MLX5_TT_IPV6_TCP] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_TCP,
+	},
+	[MLX5_TT_IPV4_UDP] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_UDP,
+	},
+	[MLX5_TT_IPV6_UDP] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_UDP,
+	},
+	[MLX5_TT_IPV4_IPSEC_AH] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_AH,
+	},
+	[MLX5_TT_IPV6_IPSEC_AH] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_AH,
+	},
+	[MLX5_TT_IPV4_IPSEC_ESP] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_ESP,
+	},
+	[MLX5_TT_IPV6_IPSEC_ESP] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_ESP,
+	},
+	[MLX5_TT_IPV4] = {
+		.etype = ETH_P_IP,
+		.proto = 0,
+	},
+	[MLX5_TT_IPV6] = {
+		.etype = ETH_P_IPV6,
+		.proto = 0,
+	},
+	[MLX5_TT_ANY] = {
+		.etype = 0,
+		.proto = 0,
+	},
+};
+
+static struct mlx5_etype_proto ttc_tunnel_rules[] = {
+	[MLX5_TT_IPV4_GRE] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_GRE,
+	},
+	[MLX5_TT_IPV6_GRE] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_GRE,
+	},
+	[MLX5_TT_IPV4_IPIP] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_IPIP,
+	},
+	[MLX5_TT_IPV6_IPIP] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_IPIP,
+	},
+	[MLX5_TT_IPV4_IPV6] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_IPV6,
+	},
+	[MLX5_TT_IPV6_IPV6] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_IPV6,
+	},
+
+};
+
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt)
+{
+	return ttc_tunnel_rules[tt].proto;
+}
+
+static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev,
+					   u8 proto_type)
+{
+	switch (proto_type) {
+	case IPPROTO_GRE:
+		return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+	case IPPROTO_IPIP:
+	case IPPROTO_IPV6:
+		return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+			MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
+	default:
+		return false;
+	}
+}
+
+static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
+{
+	int tt;
+
+	for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+		if (mlx5_tunnel_proto_supported_rx(mdev,
+						   ttc_tunnel_rules[tt].proto))
+			return true;
+	}
+	return false;
+}
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+	return (mlx5_tunnel_any_rx_proto_supported(mdev) &&
+		MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+					  ft_field_support.inner_ip_version));
+}
+
+static u8 mlx5_etype_to_ipv(u16 ethertype)
+{
+	if (ethertype == ETH_P_IP)
+		return 4;
+
+	if (ethertype == ETH_P_IPV6)
+		return 6;
+
+	return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+		       struct mlx5_flow_destination *dest, u16 etype, u8 proto)
+{
+	int match_ipv_outer =
+		MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+					  ft_field_support.outer_ip_version);
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+	u8 ipv;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return ERR_PTR(-ENOMEM);
+
+	if (proto) {
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+	}
+
+	ipv = mlx5_etype_to_ipv(etype);
+	if (match_ipv_outer && ipv) {
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+	} else if (etype) {
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+	}
+
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(dev, "%s: add rule failed\n", __func__);
+	}
+
+	kvfree(spec);
+	return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev,
+					 struct ttc_params *params,
+					 struct mlx5_ttc_table *ttc)
+{
+	struct mlx5_flow_handle **trules;
+	struct mlx5_ttc_rule *rules;
+	struct mlx5_flow_table *ft;
+	int tt;
+	int err;
+
+	ft = ttc->t;
+	rules = ttc->rules;
+	for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+		struct mlx5_ttc_rule *rule = &rules[tt];
+
+		if (test_bit(tt, params->ignore_dests))
+			continue;
+		rule->rule = mlx5_generate_ttc_rule(dev, ft, &params->dests[tt],
+						    ttc_rules[tt].etype,
+						    ttc_rules[tt].proto);
+		if (IS_ERR(rule->rule)) {
+			err = PTR_ERR(rule->rule);
+			rule->rule = NULL;
+			goto del_rules;
+		}
+		rule->default_dest = params->dests[tt];
+	}
+
+	if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev))
+		return 0;
+
+	trules    = ttc->tunnel_rules;
+	for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+		if (!mlx5_tunnel_proto_supported_rx(dev,
+						    ttc_tunnel_rules[tt].proto))
+			continue;
+		if (test_bit(tt, params->ignore_tunnel_dests))
+			continue;
+		trules[tt] = mlx5_generate_ttc_rule(dev, ft,
+						    &params->tunnel_dests[tt],
+						    ttc_tunnel_rules[tt].etype,
+						    ttc_tunnel_rules[tt].proto);
+		if (IS_ERR(trules[tt])) {
+			err = PTR_ERR(trules[tt]);
+			trules[tt] = NULL;
+			goto del_rules;
+		}
+	}
+
+	return 0;
+
+del_rules:
+	mlx5_cleanup_ttc_rules(ttc);
+	return err;
+}
+
+static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc,
+					bool use_ipv)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL);
+	if (!ttc->g)
+		return -ENOMEM;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in) {
+		kfree(ttc->g);
+		ttc->g = NULL;
+		return -ENOMEM;
+	}
+
+	/* L4 Group */
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+	if (use_ipv)
+		MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+	else
+		MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_TTC_GROUP1_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+	if (IS_ERR(ttc->g[ttc->num_groups]))
+		goto err;
+	ttc->num_groups++;
+
+	/* L3 Group */
+	MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_TTC_GROUP2_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+	if (IS_ERR(ttc->g[ttc->num_groups]))
+		goto err;
+	ttc->num_groups++;
+
+	/* Any Group */
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_TTC_GROUP3_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+	if (IS_ERR(ttc->g[ttc->num_groups]))
+		goto err;
+	ttc->num_groups++;
+
+	kvfree(in);
+	return 0;
+
+err:
+	err = PTR_ERR(ttc->g[ttc->num_groups]);
+	ttc->g[ttc->num_groups] = NULL;
+	kvfree(in);
+
+	return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev,
+			     struct mlx5_flow_table *ft,
+			     struct mlx5_flow_destination *dest,
+			     u16 etype, u8 proto)
+{
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+	u8 ipv;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return ERR_PTR(-ENOMEM);
+
+	ipv = mlx5_etype_to_ipv(etype);
+	if (etype && ipv) {
+		spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+		MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+	}
+
+	if (proto) {
+		spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+		MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+	}
+
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__);
+	}
+
+	kvfree(spec);
+	return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
+					       struct ttc_params *params,
+					       struct mlx5_ttc_table *ttc)
+{
+	struct mlx5_ttc_rule *rules;
+	struct mlx5_flow_table *ft;
+	int err;
+	int tt;
+
+	ft = ttc->t;
+	rules = ttc->rules;
+
+	for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+		struct mlx5_ttc_rule *rule = &rules[tt];
+
+		if (test_bit(tt, params->ignore_dests))
+			continue;
+		rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
+							  &params->dests[tt],
+							  ttc_rules[tt].etype,
+							  ttc_rules[tt].proto);
+		if (IS_ERR(rule->rule)) {
+			err = PTR_ERR(rule->rule);
+			rule->rule = NULL;
+			goto del_rules;
+		}
+		rule->default_dest = params->dests[tt];
+	}
+
+	return 0;
+
+del_rules:
+
+	mlx5_cleanup_ttc_rules(ttc);
+	return err;
+}
+
+static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g),
+			 GFP_KERNEL);
+	if (!ttc->g)
+		return -ENOMEM;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in) {
+		kfree(ttc->g);
+		ttc->g = NULL;
+		return -ENOMEM;
+	}
+
+	/* L4 Group */
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+	MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+	MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_INNER_TTC_GROUP1_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+	if (IS_ERR(ttc->g[ttc->num_groups]))
+		goto err;
+	ttc->num_groups++;
+
+	/* L3 Group */
+	MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_INNER_TTC_GROUP2_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+	if (IS_ERR(ttc->g[ttc->num_groups]))
+		goto err;
+	ttc->num_groups++;
+
+	/* Any Group */
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_INNER_TTC_GROUP3_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+	if (IS_ERR(ttc->g[ttc->num_groups]))
+		goto err;
+	ttc->num_groups++;
+
+	kvfree(in);
+	return 0;
+
+err:
+	err = PTR_ERR(ttc->g[ttc->num_groups]);
+	ttc->g[ttc->num_groups] = NULL;
+	kvfree(in);
+
+	return err;
+}
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+						   struct ttc_params *params)
+{
+	struct mlx5_ttc_table *ttc;
+	int err;
+
+	ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+	if (!ttc)
+		return ERR_PTR(-ENOMEM);
+
+	WARN_ON_ONCE(params->ft_attr.max_fte);
+	params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE;
+	ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+	if (IS_ERR(ttc->t)) {
+		err = PTR_ERR(ttc->t);
+		kvfree(ttc);
+		return ERR_PTR(err);
+	}
+
+	err = mlx5_create_inner_ttc_table_groups(ttc);
+	if (err)
+		goto destroy_ft;
+
+	err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc);
+	if (err)
+		goto destroy_ft;
+
+	return ttc;
+
+destroy_ft:
+	mlx5_destroy_ttc_table(ttc);
+	return ERR_PTR(err);
+}
+
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc)
+{
+	int i;
+
+	mlx5_cleanup_ttc_rules(ttc);
+	for (i = ttc->num_groups - 1; i >= 0; i--) {
+		if (!IS_ERR_OR_NULL(ttc->g[i]))
+			mlx5_destroy_flow_group(ttc->g[i]);
+		ttc->g[i] = NULL;
+	}
+
+	kfree(ttc->g);
+	mlx5_destroy_flow_table(ttc->t);
+	kvfree(ttc);
+}
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+					     struct ttc_params *params)
+{
+	bool match_ipv_outer =
+		MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+					  ft_field_support.outer_ip_version);
+	struct mlx5_ttc_table *ttc;
+	int err;
+
+	ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+	if (!ttc)
+		return ERR_PTR(-ENOMEM);
+
+	WARN_ON_ONCE(params->ft_attr.max_fte);
+	params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE;
+	ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+	if (IS_ERR(ttc->t)) {
+		err = PTR_ERR(ttc->t);
+		kvfree(ttc);
+		return ERR_PTR(err);
+	}
+
+	err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer);
+	if (err)
+		goto destroy_ft;
+
+	err = mlx5_generate_ttc_table_rules(dev, params, ttc);
+	if (err)
+		goto destroy_ft;
+
+	return ttc;
+
+destroy_ft:
+	mlx5_destroy_ttc_table(ttc);
+	return ERR_PTR(err);
+}
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+		      struct mlx5_flow_destination *new_dest)
+{
+	return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest,
+					    NULL);
+}
+
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+			  enum mlx5_traffic_types type)
+{
+	struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest;
+
+	WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
+		  "TTC[%d] default dest is not setup yet", type);
+
+	return *dest;
+}
+
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+			      enum mlx5_traffic_types type)
+{
+	struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type);
+
+	return mlx5_ttc_fwd_dest(ttc, type, &dest);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
new file mode 100644
index 0000000000..85fef0cd1c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_FS_TTC_H__
+#define __ML5_FS_TTC_H__
+
+#include <linux/mlx5/fs.h>
+
+enum mlx5_traffic_types {
+	MLX5_TT_IPV4_TCP,
+	MLX5_TT_IPV6_TCP,
+	MLX5_TT_IPV4_UDP,
+	MLX5_TT_IPV6_UDP,
+	MLX5_TT_IPV4_IPSEC_AH,
+	MLX5_TT_IPV6_IPSEC_AH,
+	MLX5_TT_IPV4_IPSEC_ESP,
+	MLX5_TT_IPV6_IPSEC_ESP,
+	MLX5_TT_IPV4,
+	MLX5_TT_IPV6,
+	MLX5_TT_ANY,
+	MLX5_NUM_TT,
+	MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY,
+};
+
+enum mlx5_tunnel_types {
+	MLX5_TT_IPV4_GRE,
+	MLX5_TT_IPV6_GRE,
+	MLX5_TT_IPV4_IPIP,
+	MLX5_TT_IPV6_IPIP,
+	MLX5_TT_IPV4_IPV6,
+	MLX5_TT_IPV6_IPV6,
+	MLX5_NUM_TUNNEL_TT,
+};
+
+struct mlx5_ttc_rule {
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_destination default_dest;
+};
+
+struct mlx5_ttc_table;
+
+struct ttc_params {
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table_attr ft_attr;
+	struct mlx5_flow_destination dests[MLX5_NUM_TT];
+	DECLARE_BITMAP(ignore_dests, MLX5_NUM_TT);
+	bool   inner_ttc;
+	DECLARE_BITMAP(ignore_tunnel_dests, MLX5_NUM_TUNNEL_TT);
+	struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+					     struct ttc_params *params);
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+						   struct ttc_params *params);
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+		      struct mlx5_flow_destination *new_dest);
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+			  enum mlx5_traffic_types type);
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+			      enum mlx5_traffic_types type);
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt);
+
+#endif /* __MLX5_FS_TTC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
new file mode 100644
index 0000000000..6dc83e871c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/kernel.h>
+#include "mlx5_core.h"
+#include "geneve.h"
+
+struct mlx5_geneve {
+	struct mlx5_core_dev *mdev;
+	__be16 opt_class;
+	u8 opt_type;
+	u32 obj_id;
+	struct mutex sync_lock; /* protect GENEVE obj operations */
+	u32 refcount;
+};
+
+static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev,
+					 __be16 class,
+					 u8 type,
+					 u8 len)
+{
+	u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u64 general_obj_types;
+	void *hdr, *opt;
+	u16 obj_id;
+	int err;
+
+	general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+	if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT))
+		return -EINVAL;
+
+	hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr);
+	opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt);
+
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+
+	MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class));
+	MLX5_SET(geneve_tlv_option, opt, option_type, type);
+	MLX5_SET(geneve_tlv_option, opt, option_data_length, len);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return obj_id;
+}
+
+static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt)
+{
+	int res = 0;
+
+	if (IS_ERR_OR_NULL(geneve))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&geneve->sync_lock);
+
+	if (geneve->refcount) {
+		if (geneve->opt_class == opt->opt_class &&
+		    geneve->opt_type == opt->type) {
+			/* We already have TLV options obj allocated */
+			geneve->refcount++;
+		} else {
+			/* TLV options obj allocated, but its params
+			 * do not match the new request.
+			 * We support only one such object.
+			 */
+			mlx5_core_warn(geneve->mdev,
+				       "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n",
+				       be16_to_cpu(opt->opt_class),
+				       opt->type,
+				       opt->length);
+			res = -EOPNOTSUPP;
+			goto unlock;
+		}
+	} else {
+		/* We don't have any TLV options obj allocated */
+
+		res = mlx5_geneve_tlv_option_create(geneve->mdev,
+						    opt->opt_class,
+						    opt->type,
+						    opt->length);
+		if (res < 0) {
+			mlx5_core_warn(geneve->mdev,
+				       "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n",
+				       be16_to_cpu(opt->opt_class),
+				       opt->type, opt->length, res);
+			goto unlock;
+		}
+		geneve->opt_class = opt->opt_class;
+		geneve->opt_type = opt->type;
+		geneve->obj_id = res;
+		geneve->refcount++;
+		res = 0;
+	}
+
+unlock:
+	mutex_unlock(&geneve->sync_lock);
+	return res;
+}
+
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve)
+{
+	if (IS_ERR_OR_NULL(geneve))
+		return;
+
+	mutex_lock(&geneve->sync_lock);
+	if (--geneve->refcount == 0) {
+		/* We've just removed the last user of Geneve option.
+		 * Now delete the object in FW.
+		 */
+		mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+		geneve->opt_class = 0;
+		geneve->opt_type = 0;
+		geneve->obj_id = 0;
+	}
+	mutex_unlock(&geneve->sync_lock);
+}
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_geneve *geneve =
+		kzalloc(sizeof(*geneve), GFP_KERNEL);
+
+	if (!geneve)
+		return ERR_PTR(-ENOMEM);
+	geneve->mdev = mdev;
+	mutex_init(&geneve->sync_lock);
+
+	return geneve;
+}
+
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve)
+{
+	if (IS_ERR_OR_NULL(geneve))
+		return;
+
+	/* Lockless since we are unloading */
+	if (geneve->refcount)
+		mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+	kfree(geneve);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h
new file mode 100644
index 0000000000..adee0cbba1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_GENEVE_H__
+#define __MLX5_GENEVE_H__
+
+#include <net/geneve.h>
+#include <linux/mlx5/driver.h>
+
+struct mlx5_geneve;
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev);
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve);
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt);
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline struct mlx5_geneve
+*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; }
+static inline void
+mlx5_geneve_destroy(struct mlx5_geneve *geneve) {}
+static inline int
+mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; }
+static inline void
+mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_GENEVE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
new file mode 100644
index 0000000000..96ffc0a0e6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
+{
+	unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size);
+
+	ida_init(&dev->roce.reserved_gids.ida);
+	dev->roce.reserved_gids.start = tblsz;
+	dev->roce.reserved_gids.count = 0;
+}
+
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
+{
+	WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida));
+	dev->roce.reserved_gids.start = 0;
+	dev->roce.reserved_gids.count = 0;
+	ida_destroy(&dev->roce.reserved_gids.ida);
+}
+
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+	if (dev->roce.reserved_gids.start < count) {
+		mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
+			       count);
+		return -ENOMEM;
+	}
+	if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) {
+		mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count);
+		return -ENOMEM;
+	}
+
+	dev->roce.reserved_gids.start -= count;
+	dev->roce.reserved_gids.count += count;
+	mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n",
+		      dev->roce.reserved_gids.count,
+		      dev->roce.reserved_gids.start);
+	return 0;
+}
+
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+	WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
+	     count, dev->roce.reserved_gids.count);
+
+	dev->roce.reserved_gids.start += count;
+	dev->roce.reserved_gids.count -= count;
+	mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n",
+		      dev->roce.reserved_gids.count,
+		      dev->roce.reserved_gids.start);
+}
+
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
+{
+	int end = dev->roce.reserved_gids.start +
+		  dev->roce.reserved_gids.count - 1;
+	int index = 0;
+
+	index = ida_alloc_range(&dev->roce.reserved_gids.ida,
+				dev->roce.reserved_gids.start, end,
+				GFP_KERNEL);
+	if (index < 0)
+		return index;
+
+	mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index);
+	*gid_index = index;
+	return 0;
+}
+
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
+{
+	mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
+	ida_free(&dev->roce.reserved_gids.ida, gid_index);
+}
+
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
+{
+	return dev->roce.reserved_gids.count;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
+
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+			   u8 roce_version, u8 roce_l3_type, const u8 *gid,
+			   const u8 *mac, bool vlan, u16 vlan_id, u8 port_num)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+	u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {};
+	void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+	char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+					  source_l3_address);
+	void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+				      source_mac_47_32);
+	int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address);
+
+	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return -EINVAL;
+
+	if (gid) {
+		if (vlan) {
+			MLX5_SET_RA(in_addr, vlan_valid, 1);
+			MLX5_SET_RA(in_addr, vlan_id, vlan_id);
+		}
+
+		ether_addr_copy(addr_mac, mac);
+		memcpy(addr_l3_addr, gid, gidsz);
+	}
+	MLX5_SET_RA(in_addr, roce_version, roce_version);
+	MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
+
+	if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0)
+		MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num);
+
+	MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+	MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+	return mlx5_cmd_exec_in(dev, set_roce_address, in);
+}
+EXPORT_SYMBOL(mlx5_core_roce_gid_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
new file mode 100644
index 0000000000..583dc7e2ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+
+static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
+				 int offset, bool read)
+{
+	int rc = -EOPNOTSUPP;
+	int bytes_returned;
+	int block_id;
+
+	if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len != HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
+
+	rc = read ?
+	     hyperv_read_cfg_blk(dev->pdev, buf,
+				 HV_CONFIG_BLOCK_SIZE_MAX, block_id,
+				 &bytes_returned) :
+	     hyperv_write_cfg_blk(dev->pdev, buf,
+				  HV_CONFIG_BLOCK_SIZE_MAX, block_id);
+
+	/* Make sure len bytes were read successfully  */
+	if (read && !rc && len != bytes_returned)
+		rc = -EIO;
+
+	if (rc) {
+		mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
+			      read ? "read" : "write", rc, len,
+			      offset);
+		return rc;
+	}
+
+	return 0;
+}
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+			int offset)
+{
+	return mlx5_hv_config_common(dev, buf, len, offset, true);
+}
+
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+			 int offset)
+{
+	return mlx5_hv_config_common(dev, buf, len, offset, false);
+}
+
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask))
+{
+	return hyperv_reg_block_invalidate(dev->pdev, context,
+					   block_invalidate);
+}
+
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
+{
+	hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
new file mode 100644
index 0000000000..f9a45573f4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_H__
+#define __LIB_HV_H__
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+#include <linux/hyperv.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+			int offset);
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+			 int offset);
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask));
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
+#endif
+
+#endif /* __LIB_HV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
new file mode 100644
index 0000000000..30564d9b00
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+#include "lib/hv_vhca.h"
+
+struct mlx5_hv_vhca {
+	struct mlx5_core_dev       *dev;
+	struct workqueue_struct    *work_queue;
+	struct mlx5_hv_vhca_agent  *agents[MLX5_HV_VHCA_AGENT_MAX];
+	struct mutex                agents_lock; /* Protect agents array */
+};
+
+struct mlx5_hv_vhca_work {
+	struct work_struct     invalidate_work;
+	struct mlx5_hv_vhca   *hv_vhca;
+	u64                    block_mask;
+};
+
+struct mlx5_hv_vhca_data_block {
+	u16     sequence;
+	u16     offset;
+	u8      reserved[4];
+	u64     data[15];
+};
+
+struct mlx5_hv_vhca_agent {
+	enum mlx5_hv_vhca_agent_type	 type;
+	struct mlx5_hv_vhca		*hv_vhca;
+	void				*priv;
+	u16                              seq;
+	void (*control)(struct mlx5_hv_vhca_agent *agent,
+			struct mlx5_hv_vhca_control_block *block);
+	void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
+			   u64 block_mask);
+	void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_hv_vhca *hv_vhca;
+
+	hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
+	if (!hv_vhca)
+		return ERR_PTR(-ENOMEM);
+
+	hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
+	if (!hv_vhca->work_queue) {
+		kfree(hv_vhca);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	hv_vhca->dev = dev;
+	mutex_init(&hv_vhca->agents_lock);
+
+	return hv_vhca;
+}
+
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return;
+
+	destroy_workqueue(hv_vhca->work_queue);
+	kfree(hv_vhca);
+}
+
+static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
+{
+	struct mlx5_hv_vhca_work *hwork;
+	struct mlx5_hv_vhca *hv_vhca;
+	int i;
+
+	hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
+	hv_vhca = hwork->hv_vhca;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (!agent || !agent->invalidate)
+			continue;
+
+		if (!(BIT(agent->type) & hwork->block_mask))
+			continue;
+
+		agent->invalidate(agent, hwork->block_mask);
+	}
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	kfree(hwork);
+}
+
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
+{
+	struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
+	struct mlx5_hv_vhca_work *work;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return;
+
+	INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
+	work->hv_vhca    = hv_vhca;
+	work->block_mask = block_mask;
+
+	queue_work(hv_vhca->work_queue, &work->invalidate_work);
+}
+
+#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
+
+static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
+					struct mlx5_hv_vhca_control_block *block)
+{
+	int i;
+
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (!agent || !agent->control)
+			continue;
+
+		if (!(AGENT_MASK(agent->type) & block->control))
+			continue;
+
+		agent->control(agent, block);
+	}
+}
+
+static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
+				      u32 *capabilities)
+{
+	int i;
+
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (agent)
+			*capabilities |= AGENT_MASK(agent->type);
+	}
+}
+
+static void
+mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
+				      u64 block_mask)
+{
+	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+	struct mlx5_core_dev *dev = hv_vhca->dev;
+	struct mlx5_hv_vhca_control_block *block;
+	u32 capabilities = 0;
+	int err;
+
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (!block)
+		return;
+
+	err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
+	if (err)
+		goto free_block;
+
+	mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
+
+	/* In case no capabilities, send empty block in return */
+	if (!capabilities) {
+		memset(block, 0, sizeof(*block));
+		goto write;
+	}
+
+	if (block->capabilities != capabilities)
+		block->capabilities = capabilities;
+
+	if (block->control & ~capabilities)
+		goto free_block;
+
+	mlx5_hv_vhca_agents_control(hv_vhca, block);
+	block->command_ack = block->command;
+
+write:
+	mlx5_hv_write_config(dev, block, sizeof(*block), 0);
+
+free_block:
+	kfree(block);
+}
+
+static struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
+{
+	return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
+					 NULL,
+					 mlx5_hv_vhca_control_agent_invalidate,
+					 NULL, NULL);
+}
+
+static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+	mlx5_hv_vhca_agent_destroy(agent);
+}
+
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+	struct mlx5_hv_vhca_agent *agent;
+	int err;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return IS_ERR_OR_NULL(hv_vhca);
+
+	err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+					  mlx5_hv_vhca_invalidate);
+	if (err)
+		return err;
+
+	agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
+	if (IS_ERR_OR_NULL(agent)) {
+		mlx5_hv_unregister_invalidate(hv_vhca->dev);
+		return IS_ERR_OR_NULL(agent);
+	}
+
+	hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
+
+	return 0;
+}
+
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+	struct mlx5_hv_vhca_agent *agent;
+	int i;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return;
+
+	agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
+	if (agent)
+		mlx5_hv_vhca_control_agent_destroy(agent);
+
+	mutex_lock(&hv_vhca->agents_lock);
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
+		WARN_ON(hv_vhca->agents[i]);
+
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	mlx5_hv_unregister_invalidate(hv_vhca->dev);
+}
+
+static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
+{
+	mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
+}
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
+			  void *priv)
+{
+	struct mlx5_hv_vhca_agent *agent;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return ERR_PTR(-ENOMEM);
+
+	if (type >= MLX5_HV_VHCA_AGENT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&hv_vhca->agents_lock);
+	if (hv_vhca->agents[type]) {
+		mutex_unlock(&hv_vhca->agents_lock);
+		return ERR_PTR(-EINVAL);
+	}
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	agent = kzalloc(sizeof(*agent), GFP_KERNEL);
+	if (!agent)
+		return ERR_PTR(-ENOMEM);
+
+	agent->type      = type;
+	agent->hv_vhca   = hv_vhca;
+	agent->priv      = priv;
+	agent->control   = control;
+	agent->invalidate = invalidate;
+	agent->cleanup   = cleaup;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	hv_vhca->agents[type] = agent;
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	mlx5_hv_vhca_agents_update(hv_vhca);
+
+	return agent;
+}
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+
+	mutex_lock(&hv_vhca->agents_lock);
+
+	if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
+		mutex_unlock(&hv_vhca->agents_lock);
+		return;
+	}
+
+	hv_vhca->agents[agent->type] = NULL;
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	if (agent->cleanup)
+		agent->cleanup(agent);
+
+	kfree(agent);
+
+	mlx5_hv_vhca_agents_update(hv_vhca);
+}
+
+static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
+					   struct mlx5_hv_vhca_data_block *data_block,
+					   void *src, int len, int *offset)
+{
+	int bytes = min_t(int, (int)sizeof(data_block->data), len);
+
+	data_block->sequence = agent->seq;
+	data_block->offset   = (*offset)++;
+	memcpy(data_block->data, src, bytes);
+
+	return bytes;
+}
+
+static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
+{
+	agent->seq++;
+}
+
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+			     void *buf, int len)
+{
+	int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
+	int block_offset = 0;
+	int total = 0;
+	int err;
+
+	while (len) {
+		struct mlx5_hv_vhca_data_block data_block = {0};
+		int bytes;
+
+		bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
+							buf + total,
+							len, &block_offset);
+		if (!bytes)
+			return -ENOMEM;
+
+		err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
+					   sizeof(data_block), offset);
+		if (err)
+			return err;
+
+		total += bytes;
+		len   -= bytes;
+	}
+
+	mlx5_hv_vhca_agent_seq_update(agent);
+
+	return 0;
+}
+
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
+{
+	return agent->priv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
new file mode 100644
index 0000000000..f240ffe511
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_VHCA_H__
+#define __LIB_HV_VHCA_H__
+
+#include "en.h"
+#include "lib/hv.h"
+
+struct mlx5_hv_vhca_agent;
+struct mlx5_hv_vhca;
+struct mlx5_hv_vhca_control_block;
+
+enum mlx5_hv_vhca_agent_type {
+	MLX5_HV_VHCA_AGENT_CONTROL = 0,
+	MLX5_HV_VHCA_AGENT_STATS   = 1,
+	MLX5_HV_VHCA_AGENT_MAX = 32,
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+struct mlx5_hv_vhca_control_block {
+	u32     capabilities;
+	u32     control;
+	u16     command;
+	u16     command_ack;
+	u16     version;
+	u16     rings;
+	u32     reserved1[28];
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+			  void *context);
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+			     void *buf, int len);
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
+
+#else
+
+static inline struct mlx5_hv_vhca *
+mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+	return NULL;
+}
+
+static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+	return 0;
+}
+
+static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline void mlx5_hv_vhca_invalidate(void *context,
+					   u64 block_mask)
+{
+}
+
+static inline struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+			  void *context)
+{
+	return NULL;
+}
+
+static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+}
+#endif
+
+#endif /* __LIB_HV_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c
new file mode 100644
index 0000000000..6e3f178d6f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "fs_core.h"
+#include "lib/ipsec_fs_roce.h"
+#include "mlx5_core.h"
+
+struct mlx5_ipsec_miss {
+	struct mlx5_flow_group *group;
+	struct mlx5_flow_handle *rule;
+};
+
+struct mlx5_ipsec_rx_roce {
+	struct mlx5_flow_group *g;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_ipsec_miss roce_miss;
+
+	struct mlx5_flow_table *ft_rdma;
+	struct mlx5_flow_namespace *ns_rdma;
+};
+
+struct mlx5_ipsec_tx_roce {
+	struct mlx5_flow_group *g;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_namespace *ns;
+};
+
+struct mlx5_ipsec_fs {
+	struct mlx5_ipsec_rx_roce ipv4_rx;
+	struct mlx5_ipsec_rx_roce ipv6_rx;
+	struct mlx5_ipsec_tx_roce tx;
+};
+
+static void ipsec_fs_roce_setup_udp_dport(struct mlx5_flow_spec *spec,
+					  u16 dport)
+{
+	spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, dport);
+}
+
+static int
+ipsec_fs_roce_rx_rule_setup(struct mlx5_core_dev *mdev,
+			    struct mlx5_flow_destination *default_dst,
+			    struct mlx5_ipsec_rx_roce *roce)
+{
+	struct mlx5_flow_destination dst = {};
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	ipsec_fs_roce_setup_udp_dport(spec, ROCE_V2_UDP_DPORT);
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE;
+	dst.ft = roce->ft_rdma;
+	rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Fail to add RX RoCE IPsec rule err=%d\n",
+			      err);
+		goto fail_add_rule;
+	}
+
+	roce->rule = rule;
+
+	memset(spec, 0, sizeof(*spec));
+	rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, default_dst, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Fail to add RX RoCE IPsec miss rule err=%d\n",
+			      err);
+		goto fail_add_default_rule;
+	}
+
+	roce->roce_miss.rule = rule;
+
+	kvfree(spec);
+	return 0;
+
+fail_add_default_rule:
+	mlx5_del_flow_rules(roce->rule);
+fail_add_rule:
+	kvfree(spec);
+	return err;
+}
+
+static int ipsec_fs_roce_tx_rule_setup(struct mlx5_core_dev *mdev,
+				       struct mlx5_ipsec_tx_roce *roce,
+				       struct mlx5_flow_table *pol_ft)
+{
+	struct mlx5_flow_destination dst = {};
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	int err = 0;
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE;
+	dst.ft = pol_ft;
+	rule = mlx5_add_flow_rules(roce->ft, NULL, &flow_act, &dst,
+				   1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Fail to add TX RoCE IPsec rule err=%d\n",
+			      err);
+		goto out;
+	}
+	roce->rule = rule;
+
+out:
+	return err;
+}
+
+void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce)
+{
+	struct mlx5_ipsec_tx_roce *tx_roce;
+
+	if (!ipsec_roce)
+		return;
+
+	tx_roce = &ipsec_roce->tx;
+
+	mlx5_del_flow_rules(tx_roce->rule);
+	mlx5_destroy_flow_group(tx_roce->g);
+	mlx5_destroy_flow_table(tx_roce->ft);
+}
+
+#define MLX5_TX_ROCE_GROUP_SIZE BIT(0)
+
+int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev,
+				 struct mlx5_ipsec_fs *ipsec_roce,
+				 struct mlx5_flow_table *pol_ft)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_ipsec_tx_roce *roce;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_group *g;
+	int ix = 0;
+	int err;
+	u32 *in;
+
+	if (!ipsec_roce)
+		return 0;
+
+	roce = &ipsec_roce->tx;
+
+	in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	ft_attr.max_fte = 1;
+	ft = mlx5_create_flow_table(roce->ns, &ft_attr);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		mlx5_core_err(mdev, "Fail to create RoCE IPsec tx ft err=%d\n", err);
+		goto free_in;
+	}
+
+	roce->ft = ft;
+
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_TX_ROCE_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	g = mlx5_create_flow_group(ft, in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		mlx5_core_err(mdev, "Fail to create RoCE IPsec tx group err=%d\n", err);
+		goto destroy_table;
+	}
+	roce->g = g;
+
+	err = ipsec_fs_roce_tx_rule_setup(mdev, roce, pol_ft);
+	if (err) {
+		mlx5_core_err(mdev, "Fail to create RoCE IPsec tx rules err=%d\n", err);
+		goto destroy_group;
+	}
+
+	kvfree(in);
+	return 0;
+
+destroy_group:
+	mlx5_destroy_flow_group(roce->g);
+destroy_table:
+	mlx5_destroy_flow_table(ft);
+free_in:
+	kvfree(in);
+	return err;
+}
+
+struct mlx5_flow_table *mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_roce, u32 family)
+{
+	struct mlx5_ipsec_rx_roce *rx_roce;
+
+	if (!ipsec_roce)
+		return NULL;
+
+	rx_roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx :
+					&ipsec_roce->ipv6_rx;
+
+	return rx_roce->ft;
+}
+
+void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, u32 family)
+{
+	struct mlx5_ipsec_rx_roce *rx_roce;
+
+	if (!ipsec_roce)
+		return;
+
+	rx_roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx :
+					&ipsec_roce->ipv6_rx;
+
+	mlx5_del_flow_rules(rx_roce->roce_miss.rule);
+	mlx5_del_flow_rules(rx_roce->rule);
+	mlx5_destroy_flow_table(rx_roce->ft_rdma);
+	mlx5_destroy_flow_group(rx_roce->roce_miss.group);
+	mlx5_destroy_flow_group(rx_roce->g);
+	mlx5_destroy_flow_table(rx_roce->ft);
+}
+
+#define MLX5_RX_ROCE_GROUP_SIZE BIT(0)
+
+int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev,
+				 struct mlx5_ipsec_fs *ipsec_roce,
+				 struct mlx5_flow_namespace *ns,
+				 struct mlx5_flow_destination *default_dst,
+				 u32 family, u32 level, u32 prio)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_ipsec_rx_roce *roce;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_group *g;
+	void *outer_headers_c;
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	if (!ipsec_roce)
+		return 0;
+
+	roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx :
+				     &ipsec_roce->ipv6_rx;
+
+	ft_attr.max_fte = 2;
+	ft_attr.level = level;
+	ft_attr.prio = prio;
+	ft = mlx5_create_flow_table(ns, &ft_attr);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at nic err=%d\n", err);
+		return err;
+	}
+
+	roce->ft = ft;
+
+	in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL);
+	if (!in) {
+		err = -ENOMEM;
+		goto fail_nomem;
+	}
+
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+	outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
+
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_RX_ROCE_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	g = mlx5_create_flow_group(ft, in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		mlx5_core_err(mdev, "Fail to create RoCE IPsec rx group at nic err=%d\n", err);
+		goto fail_group;
+	}
+	roce->g = g;
+
+	memset(in, 0, MLX5_ST_SZ_BYTES(create_flow_group_in));
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_RX_ROCE_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	g = mlx5_create_flow_group(ft, in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		mlx5_core_err(mdev, "Fail to create RoCE IPsec rx miss group at nic err=%d\n", err);
+		goto fail_mgroup;
+	}
+	roce->roce_miss.group = g;
+
+	memset(&ft_attr, 0, sizeof(ft_attr));
+	if (family == AF_INET)
+		ft_attr.level = 1;
+	ft = mlx5_create_flow_table(roce->ns_rdma, &ft_attr);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at rdma err=%d\n", err);
+		goto fail_rdma_table;
+	}
+
+	roce->ft_rdma = ft;
+
+	err = ipsec_fs_roce_rx_rule_setup(mdev, default_dst, roce);
+	if (err) {
+		mlx5_core_err(mdev, "Fail to create RoCE IPsec rx rules err=%d\n", err);
+		goto fail_setup_rule;
+	}
+
+	kvfree(in);
+	return 0;
+
+fail_setup_rule:
+	mlx5_destroy_flow_table(roce->ft_rdma);
+fail_rdma_table:
+	mlx5_destroy_flow_group(roce->roce_miss.group);
+fail_mgroup:
+	mlx5_destroy_flow_group(roce->g);
+fail_group:
+	kvfree(in);
+fail_nomem:
+	mlx5_destroy_flow_table(roce->ft);
+	return err;
+}
+
+void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce)
+{
+	kfree(ipsec_roce);
+}
+
+struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_ipsec_fs *roce_ipsec;
+	struct mlx5_flow_namespace *ns;
+
+	ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC);
+	if (!ns) {
+		mlx5_core_err(mdev, "Failed to get RoCE rx ns\n");
+		return NULL;
+	}
+
+	roce_ipsec = kzalloc(sizeof(*roce_ipsec), GFP_KERNEL);
+	if (!roce_ipsec)
+		return NULL;
+
+	roce_ipsec->ipv4_rx.ns_rdma = ns;
+	roce_ipsec->ipv6_rx.ns_rdma = ns;
+
+	ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC);
+	if (!ns) {
+		mlx5_core_err(mdev, "Failed to get RoCE tx ns\n");
+		goto err_tx;
+	}
+
+	roce_ipsec->tx.ns = ns;
+
+	return roce_ipsec;
+
+err_tx:
+	kfree(roce_ipsec);
+	return NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h
new file mode 100644
index 0000000000..9712d705fe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_LIB_IPSEC_H__
+#define __MLX5_LIB_IPSEC_H__
+
+struct mlx5_ipsec_fs;
+
+struct mlx5_flow_table *
+mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_roce, u32 family);
+void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce,
+				   u32 family);
+int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev,
+				 struct mlx5_ipsec_fs *ipsec_roce,
+				 struct mlx5_flow_namespace *ns,
+				 struct mlx5_flow_destination *default_dst,
+				 u32 family, u32 level, u32 prio);
+void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce);
+int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev,
+				 struct mlx5_ipsec_fs *ipsec_roce,
+				 struct mlx5_flow_table *pol_ft);
+void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce);
+struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev);
+
+#endif /* __MLX5_LIB_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c
new file mode 100644
index 0000000000..4a078113e2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c
@@ -0,0 +1,2411 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/macsec.h>
+#include <linux/mlx5/qp.h>
+#include <linux/if_vlan.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/macsec.h>
+#include "fs_core.h"
+#include "lib/macsec_fs.h"
+#include "mlx5_core.h"
+
+/* MACsec TX flow steering */
+#define CRYPTO_NUM_MAXSEC_FTE BIT(15)
+#define CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE 1
+
+#define TX_CRYPTO_TABLE_LEVEL 0
+#define TX_CRYPTO_TABLE_NUM_GROUPS 3
+#define TX_CRYPTO_TABLE_MKE_GROUP_SIZE 1
+#define TX_CRYPTO_TABLE_SA_GROUP_SIZE \
+	(CRYPTO_NUM_MAXSEC_FTE - (TX_CRYPTO_TABLE_MKE_GROUP_SIZE + \
+				  CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE))
+#define TX_CHECK_TABLE_LEVEL 1
+#define TX_CHECK_TABLE_NUM_FTE 2
+#define RX_CRYPTO_TABLE_LEVEL 0
+#define RX_CHECK_TABLE_LEVEL 1
+#define RX_ROCE_TABLE_LEVEL 2
+#define RX_CHECK_TABLE_NUM_FTE 3
+#define RX_ROCE_TABLE_NUM_FTE 2
+#define RX_CRYPTO_TABLE_NUM_GROUPS 3
+#define RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE \
+	((CRYPTO_NUM_MAXSEC_FTE - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE) / 2)
+#define RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE \
+	(CRYPTO_NUM_MAXSEC_FTE - RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE)
+#define RX_NUM_OF_RULES_PER_SA 2
+
+#define RDMA_RX_ROCE_IP_TABLE_LEVEL 0
+#define RDMA_RX_ROCE_MACSEC_OP_TABLE_LEVEL 1
+
+#define MLX5_MACSEC_TAG_LEN 8 /* SecTAG length with ethertype and without the optional SCI */
+#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK 0x23
+#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET 0x8
+#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET 0x5
+#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT (0x1 << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET)
+#define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8
+#define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN)
+
+/* MACsec RX flow steering */
+#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E
+
+/* MACsec fs_id handling for steering */
+#define macsec_fs_set_tx_fs_id(fs_id) (MLX5_ETH_WQE_FT_META_MACSEC | (fs_id) << 2)
+#define macsec_fs_set_rx_fs_id(fs_id) ((fs_id) | BIT(30))
+
+struct mlx5_sectag_header {
+	__be16 ethertype;
+	u8 tci_an;
+	u8 sl;
+	u32 pn;
+	u8 sci[MACSEC_SCI_LEN]; /* optional */
+}  __packed;
+
+struct mlx5_roce_macsec_tx_rule {
+	u32 fs_id;
+	u16 gid_idx;
+	struct list_head entry;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_modify_hdr *meta_modhdr;
+};
+
+struct mlx5_macsec_tx_rule {
+	struct mlx5_flow_handle *rule;
+	struct mlx5_pkt_reformat *pkt_reformat;
+	u32 fs_id;
+};
+
+struct mlx5_macsec_flow_table {
+	int num_groups;
+	struct mlx5_flow_table *t;
+	struct mlx5_flow_group **g;
+};
+
+struct mlx5_macsec_tables {
+	struct mlx5_macsec_flow_table ft_crypto;
+	struct mlx5_flow_handle *crypto_miss_rule;
+
+	struct mlx5_flow_table *ft_check;
+	struct mlx5_flow_group  *ft_check_group;
+	struct mlx5_fc *check_miss_rule_counter;
+	struct mlx5_flow_handle *check_miss_rule;
+	struct mlx5_fc *check_rule_counter;
+
+	u32 refcnt;
+};
+
+struct mlx5_fs_id {
+	u32 id;
+	refcount_t refcnt;
+	sci_t sci;
+	struct rhash_head hash;
+};
+
+struct mlx5_macsec_device {
+	struct list_head macsec_devices_list_entry;
+	void *macdev;
+	struct xarray tx_id_xa;
+	struct xarray rx_id_xa;
+};
+
+struct mlx5_macsec_tx {
+	struct mlx5_flow_handle *crypto_mke_rule;
+	struct mlx5_flow_handle *check_rule;
+
+	struct ida tx_halloc;
+
+	struct mlx5_macsec_tables tables;
+
+	struct mlx5_flow_table *ft_rdma_tx;
+};
+
+struct mlx5_roce_macsec_rx_rule {
+	u32 fs_id;
+	u16 gid_idx;
+	struct mlx5_flow_handle *op;
+	struct mlx5_flow_handle *ip;
+	struct list_head entry;
+};
+
+struct mlx5_macsec_rx_rule {
+	struct mlx5_flow_handle *rule[RX_NUM_OF_RULES_PER_SA];
+	struct mlx5_modify_hdr *meta_modhdr;
+};
+
+struct mlx5_macsec_miss {
+	struct mlx5_flow_group *g;
+	struct mlx5_flow_handle *rule;
+};
+
+struct mlx5_macsec_rx_roce {
+	/* Flow table/rules in NIC domain, to check if it's a RoCE packet */
+	struct mlx5_flow_group *g;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_modify_hdr *copy_modify_hdr;
+	struct mlx5_macsec_miss nic_miss;
+
+	/* Flow table/rule in RDMA domain, to check dgid */
+	struct mlx5_flow_table *ft_ip_check;
+	struct mlx5_flow_table *ft_macsec_op_check;
+	struct mlx5_macsec_miss miss;
+};
+
+struct mlx5_macsec_rx {
+	struct mlx5_flow_handle *check_rule[2];
+	struct mlx5_pkt_reformat *check_rule_pkt_reformat[2];
+
+	struct mlx5_macsec_tables tables;
+	struct mlx5_macsec_rx_roce roce;
+};
+
+union mlx5_macsec_rule {
+	struct mlx5_macsec_tx_rule tx_rule;
+	struct mlx5_macsec_rx_rule rx_rule;
+};
+
+static const struct rhashtable_params rhash_sci = {
+	.key_len = sizeof_field(struct mlx5_fs_id, sci),
+	.key_offset = offsetof(struct mlx5_fs_id, sci),
+	.head_offset = offsetof(struct mlx5_fs_id, hash),
+	.automatic_shrinking = true,
+	.min_size = 1,
+};
+
+static const struct rhashtable_params rhash_fs_id = {
+	.key_len = sizeof_field(struct mlx5_fs_id, id),
+	.key_offset = offsetof(struct mlx5_fs_id, id),
+	.head_offset = offsetof(struct mlx5_fs_id, hash),
+	.automatic_shrinking = true,
+	.min_size = 1,
+};
+
+struct mlx5_macsec_fs {
+	struct mlx5_core_dev *mdev;
+	struct mlx5_macsec_tx *tx_fs;
+	struct mlx5_macsec_rx *rx_fs;
+
+	/* Stats manage */
+	struct mlx5_macsec_stats stats;
+
+	/* Tx sci -> fs id mapping handling */
+	struct rhashtable sci_hash;      /* sci -> mlx5_fs_id */
+
+	/* RX fs_id -> mlx5_fs_id mapping handling */
+	struct rhashtable fs_id_hash;      /* fs_id -> mlx5_fs_id */
+
+	/* TX & RX fs_id lists per macsec device */
+	struct list_head macsec_devices_list;
+};
+
+static void macsec_fs_destroy_groups(struct mlx5_macsec_flow_table *ft)
+{
+	int i;
+
+	for (i = ft->num_groups - 1; i >= 0; i--) {
+		if (!IS_ERR_OR_NULL(ft->g[i]))
+			mlx5_destroy_flow_group(ft->g[i]);
+		ft->g[i] = NULL;
+	}
+	ft->num_groups = 0;
+}
+
+static void macsec_fs_destroy_flow_table(struct mlx5_macsec_flow_table *ft)
+{
+	macsec_fs_destroy_groups(ft);
+	kfree(ft->g);
+	mlx5_destroy_flow_table(ft->t);
+	ft->t = NULL;
+}
+
+static void macsec_fs_tx_destroy(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs;
+	struct mlx5_macsec_tables *tx_tables;
+
+	if (mlx5_is_macsec_roce_supported(macsec_fs->mdev))
+		mlx5_destroy_flow_table(tx_fs->ft_rdma_tx);
+
+	tx_tables = &tx_fs->tables;
+
+	/* Tx check table */
+	if (tx_fs->check_rule) {
+		mlx5_del_flow_rules(tx_fs->check_rule);
+		tx_fs->check_rule = NULL;
+	}
+
+	if (tx_tables->check_miss_rule) {
+		mlx5_del_flow_rules(tx_tables->check_miss_rule);
+		tx_tables->check_miss_rule = NULL;
+	}
+
+	if (tx_tables->ft_check_group) {
+		mlx5_destroy_flow_group(tx_tables->ft_check_group);
+		tx_tables->ft_check_group = NULL;
+	}
+
+	if (tx_tables->ft_check) {
+		mlx5_destroy_flow_table(tx_tables->ft_check);
+		tx_tables->ft_check = NULL;
+	}
+
+	/* Tx crypto table */
+	if (tx_fs->crypto_mke_rule) {
+		mlx5_del_flow_rules(tx_fs->crypto_mke_rule);
+		tx_fs->crypto_mke_rule = NULL;
+	}
+
+	if (tx_tables->crypto_miss_rule) {
+		mlx5_del_flow_rules(tx_tables->crypto_miss_rule);
+		tx_tables->crypto_miss_rule = NULL;
+	}
+
+	macsec_fs_destroy_flow_table(&tx_tables->ft_crypto);
+}
+
+static int macsec_fs_tx_create_crypto_table_groups(struct mlx5_macsec_flow_table *ft)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	int mclen = MLX5_ST_SZ_BYTES(fte_match_param);
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	ft->g = kcalloc(TX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+	if (!ft->g)
+		return -ENOMEM;
+	in = kvzalloc(inlen, GFP_KERNEL);
+
+	if (!in) {
+		kfree(ft->g);
+		ft->g = NULL;
+		return -ENOMEM;
+	}
+
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+	/* Flow Group for MKE match */
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += TX_CRYPTO_TABLE_MKE_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	/* Flow Group for SA rules */
+	memset(in, 0, inlen);
+	memset(mc, 0, mclen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
+	MLX5_SET(fte_match_param, mc, misc_parameters_2.metadata_reg_a,
+		 MLX5_ETH_WQE_FT_META_MACSEC_MASK);
+
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += TX_CRYPTO_TABLE_SA_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	/* Flow Group for l2 traps */
+	memset(in, 0, inlen);
+	memset(mc, 0, mclen);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	kvfree(in);
+	return 0;
+
+err:
+	err = PTR_ERR(ft->g[ft->num_groups]);
+	ft->g[ft->num_groups] = NULL;
+	kvfree(in);
+
+	return err;
+}
+
+static struct mlx5_flow_table
+	*macsec_fs_auto_group_table_create(struct mlx5_flow_namespace *ns, int flags,
+					   int level, int max_fte)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_table *fdb = NULL;
+
+	/* reserve entry for the match all miss group and rule */
+	ft_attr.autogroup.num_reserved_entries = 1;
+	ft_attr.autogroup.max_num_groups = 1;
+	ft_attr.prio = 0;
+	ft_attr.flags = flags;
+	ft_attr.level = level;
+	ft_attr.max_fte = max_fte;
+
+	fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+
+	return fdb;
+}
+
+enum {
+	RDMA_TX_MACSEC_LEVEL = 0,
+};
+
+static int macsec_fs_tx_roce_create(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *ft;
+	int err;
+
+	if (!mlx5_is_macsec_roce_supported(mdev)) {
+		mlx5_core_dbg(mdev, "Failed to init RoCE MACsec, capabilities not supported\n");
+		return 0;
+	}
+
+	ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC);
+	if (!ns)
+		return -ENOMEM;
+
+	/* Tx RoCE crypto table  */
+	ft = macsec_fs_auto_group_table_create(ns, 0, RDMA_TX_MACSEC_LEVEL, CRYPTO_NUM_MAXSEC_FTE);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		mlx5_core_err(mdev, "Failed to create MACsec RoCE Tx crypto table err(%d)\n", err);
+		return err;
+	}
+	tx_fs->ft_rdma_tx = ft;
+
+	return 0;
+}
+
+static int macsec_fs_tx_create(struct mlx5_macsec_fs *macsec_fs)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_macsec_tables *tx_tables;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_macsec_flow_table *ft_crypto;
+	struct mlx5_flow_table *flow_table;
+	struct mlx5_flow_group *flow_group;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	u32 *flow_group_in;
+	int err;
+
+	ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
+	if (!ns)
+		return -ENOMEM;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in) {
+		err = -ENOMEM;
+		goto out_spec;
+	}
+
+	tx_tables = &tx_fs->tables;
+	ft_crypto = &tx_tables->ft_crypto;
+
+	/* Tx crypto table  */
+	ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+	ft_attr.level = TX_CRYPTO_TABLE_LEVEL;
+	ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE;
+
+	flow_table = mlx5_create_flow_table(ns, &ft_attr);
+	if (IS_ERR(flow_table)) {
+		err = PTR_ERR(flow_table);
+		mlx5_core_err(mdev, "Failed to create MACsec Tx crypto table err(%d)\n", err);
+		goto out_flow_group;
+	}
+	ft_crypto->t = flow_table;
+
+	/* Tx crypto table groups */
+	err = macsec_fs_tx_create_crypto_table_groups(ft_crypto);
+	if (err) {
+		mlx5_core_err(mdev,
+			      "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+			      err);
+		goto err;
+	}
+
+	/* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_PAE);
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+	rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, NULL, 0);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add MACsec TX MKE rule, err=%d\n", err);
+		goto err;
+	}
+	tx_fs->crypto_mke_rule = rule;
+
+	/* Tx crypto table Default miss rule */
+	memset(&flow_act, 0, sizeof(flow_act));
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add MACsec Tx table default miss rule %d\n", err);
+		goto err;
+	}
+	tx_tables->crypto_miss_rule = rule;
+
+	/* Tx check table */
+	flow_table = macsec_fs_auto_group_table_create(ns, 0, TX_CHECK_TABLE_LEVEL,
+						       TX_CHECK_TABLE_NUM_FTE);
+	if (IS_ERR(flow_table)) {
+		err = PTR_ERR(flow_table);
+		mlx5_core_err(mdev, "Fail to create MACsec TX check table, err(%d)\n", err);
+		goto err;
+	}
+	tx_tables->ft_check = flow_table;
+
+	/* Tx check table Default miss group/rule */
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1);
+	flow_group = mlx5_create_flow_group(tx_tables->ft_check, flow_group_in);
+	if (IS_ERR(flow_group)) {
+		err = PTR_ERR(flow_group);
+		mlx5_core_err(mdev,
+			      "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+			      err);
+		goto err;
+	}
+	tx_tables->ft_check_group = flow_group;
+
+	/* Tx check table default drop rule */
+	memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+	memset(&flow_act, 0, sizeof(flow_act));
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter);
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+	rule = mlx5_add_flow_rules(tx_tables->ft_check,  NULL, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to added MACsec tx check drop rule, err(%d)\n", err);
+		goto err;
+	}
+	tx_tables->check_miss_rule = rule;
+
+	/* Tx check table rule */
+	memset(spec, 0, sizeof(struct mlx5_flow_spec));
+	memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+	memset(&flow_act, 0, sizeof(flow_act));
+
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4);
+	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0);
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+	flow_act.flags = FLOW_ACT_NO_APPEND;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter);
+	rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add MACsec check rule, err=%d\n", err);
+		goto err;
+	}
+	tx_fs->check_rule = rule;
+
+	err = macsec_fs_tx_roce_create(macsec_fs);
+	if (err)
+		goto err;
+
+	kvfree(flow_group_in);
+	kvfree(spec);
+	return 0;
+
+err:
+	macsec_fs_tx_destroy(macsec_fs);
+out_flow_group:
+	kvfree(flow_group_in);
+out_spec:
+	kvfree(spec);
+	return err;
+}
+
+static int macsec_fs_tx_ft_get(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs;
+	struct mlx5_macsec_tables *tx_tables;
+	int err = 0;
+
+	tx_tables = &tx_fs->tables;
+	if (tx_tables->refcnt)
+		goto out;
+
+	err = macsec_fs_tx_create(macsec_fs);
+	if (err)
+		return err;
+
+out:
+	tx_tables->refcnt++;
+	return err;
+}
+
+static void macsec_fs_tx_ft_put(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables;
+
+	if (--tx_tables->refcnt)
+		return;
+
+	macsec_fs_tx_destroy(macsec_fs);
+}
+
+static int macsec_fs_tx_setup_fte(struct mlx5_macsec_fs *macsec_fs,
+				  struct mlx5_flow_spec *spec,
+				  struct mlx5_flow_act *flow_act,
+				  u32 macsec_obj_id,
+				  u32 *fs_id)
+{
+	struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs;
+	int err = 0;
+	u32 id;
+
+	err = ida_alloc_range(&tx_fs->tx_halloc, 1,
+			      MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES,
+			      GFP_KERNEL);
+	if (err < 0)
+		return err;
+
+	id = err;
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+	/* Metadata match */
+	MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
+		 MLX5_ETH_WQE_FT_META_MACSEC_MASK);
+	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
+		 macsec_fs_set_tx_fs_id(id));
+
+	*fs_id = id;
+	flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC;
+	flow_act->crypto.obj_id = macsec_obj_id;
+
+	mlx5_core_dbg(macsec_fs->mdev, "Tx fte: macsec obj_id %u, fs_id %u\n", macsec_obj_id, id);
+	return 0;
+}
+
+static void macsec_fs_tx_create_sectag_header(const struct macsec_context *ctx,
+					      char *reformatbf,
+					      size_t *reformat_size)
+{
+	const struct macsec_secy *secy = ctx->secy;
+	bool sci_present = macsec_send_sci(secy);
+	struct mlx5_sectag_header sectag = {};
+	const struct macsec_tx_sc *tx_sc;
+
+	tx_sc = &secy->tx_sc;
+	sectag.ethertype = htons(ETH_P_MACSEC);
+
+	if (sci_present) {
+		sectag.tci_an |= MACSEC_TCI_SC;
+		memcpy(&sectag.sci, &secy->sci,
+		       sizeof(sectag.sci));
+	} else {
+		if (tx_sc->end_station)
+			sectag.tci_an |= MACSEC_TCI_ES;
+		if (tx_sc->scb)
+			sectag.tci_an |= MACSEC_TCI_SCB;
+	}
+
+	/* With GCM, C/E clear for !encrypt, both set for encrypt */
+	if (tx_sc->encrypt)
+		sectag.tci_an |= MACSEC_TCI_CONFID;
+	else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN)
+		sectag.tci_an |= MACSEC_TCI_C;
+
+	sectag.tci_an |= tx_sc->encoding_sa;
+
+	*reformat_size = MLX5_MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0);
+
+	memcpy(reformatbf, &sectag, *reformat_size);
+}
+
+static bool macsec_fs_is_macsec_device_empty(struct mlx5_macsec_device *macsec_device)
+{
+	if (xa_empty(&macsec_device->tx_id_xa) &&
+	    xa_empty(&macsec_device->rx_id_xa))
+		return true;
+
+	return false;
+}
+
+static void macsec_fs_id_del(struct list_head *macsec_devices_list, u32 fs_id,
+			     void *macdev, struct rhashtable *hash_table, bool is_tx)
+{
+	const struct rhashtable_params *rhash = (is_tx) ? &rhash_sci : &rhash_fs_id;
+	struct mlx5_macsec_device *iter, *macsec_device = NULL;
+	struct mlx5_fs_id *fs_id_found;
+	struct xarray *fs_id_xa;
+
+	list_for_each_entry(iter, macsec_devices_list, macsec_devices_list_entry) {
+		if (iter->macdev == macdev) {
+			macsec_device = iter;
+			break;
+		}
+	}
+	WARN_ON(!macsec_device);
+
+	fs_id_xa = (is_tx) ? &macsec_device->tx_id_xa :
+			     &macsec_device->rx_id_xa;
+	xa_lock(fs_id_xa);
+	fs_id_found = xa_load(fs_id_xa, fs_id);
+	WARN_ON(!fs_id_found);
+
+	if (!refcount_dec_and_test(&fs_id_found->refcnt)) {
+		xa_unlock(fs_id_xa);
+		return;
+	}
+
+	if (fs_id_found->id) {
+		/* Make sure ongoing datapath readers sees a valid SA */
+		rhashtable_remove_fast(hash_table, &fs_id_found->hash, *rhash);
+		fs_id_found->id = 0;
+	}
+	xa_unlock(fs_id_xa);
+
+	xa_erase(fs_id_xa, fs_id);
+
+	kfree(fs_id_found);
+
+	if (macsec_fs_is_macsec_device_empty(macsec_device)) {
+		list_del(&macsec_device->macsec_devices_list_entry);
+		kfree(macsec_device);
+	}
+}
+
+static int macsec_fs_id_add(struct list_head *macsec_devices_list, u32 fs_id,
+			    void *macdev, struct rhashtable *hash_table, sci_t sci,
+			    bool is_tx)
+{
+	const struct rhashtable_params *rhash = (is_tx) ? &rhash_sci : &rhash_fs_id;
+	struct mlx5_macsec_device *iter, *macsec_device = NULL;
+	struct mlx5_fs_id *fs_id_iter;
+	struct xarray *fs_id_xa;
+	int err;
+
+	if (!is_tx) {
+		rcu_read_lock();
+		fs_id_iter = rhashtable_lookup(hash_table, &fs_id, rhash_fs_id);
+		if (fs_id_iter) {
+			refcount_inc(&fs_id_iter->refcnt);
+			rcu_read_unlock();
+			return 0;
+		}
+		rcu_read_unlock();
+	}
+
+	fs_id_iter = kzalloc(sizeof(*fs_id_iter), GFP_KERNEL);
+	if (!fs_id_iter)
+		return -ENOMEM;
+
+	list_for_each_entry(iter, macsec_devices_list, macsec_devices_list_entry) {
+		if (iter->macdev == macdev) {
+			macsec_device = iter;
+			break;
+		}
+	}
+
+	if (!macsec_device) { /* first time adding a SA to that device */
+		macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL);
+		if (!macsec_device) {
+			err = -ENOMEM;
+			goto err_alloc_dev;
+		}
+		macsec_device->macdev = macdev;
+		xa_init(&macsec_device->tx_id_xa);
+		xa_init(&macsec_device->rx_id_xa);
+		list_add(&macsec_device->macsec_devices_list_entry, macsec_devices_list);
+	}
+
+	fs_id_xa = (is_tx) ? &macsec_device->tx_id_xa :
+			     &macsec_device->rx_id_xa;
+	fs_id_iter->id = fs_id;
+	refcount_set(&fs_id_iter->refcnt, 1);
+	fs_id_iter->sci = sci;
+	err = xa_err(xa_store(fs_id_xa, fs_id, fs_id_iter, GFP_KERNEL));
+	if (err)
+		goto err_store_id;
+
+	err = rhashtable_insert_fast(hash_table, &fs_id_iter->hash, *rhash);
+	if (err)
+		goto err_hash_insert;
+
+	return 0;
+
+err_hash_insert:
+	xa_erase(fs_id_xa, fs_id);
+err_store_id:
+	if (macsec_fs_is_macsec_device_empty(macsec_device)) {
+		list_del(&macsec_device->macsec_devices_list_entry);
+		kfree(macsec_device);
+	}
+err_alloc_dev:
+	kfree(fs_id_iter);
+	return err;
+}
+
+static void macsec_fs_tx_del_rule(struct mlx5_macsec_fs *macsec_fs,
+				  struct mlx5_macsec_tx_rule *tx_rule,
+				  void *macdev)
+{
+	macsec_fs_id_del(&macsec_fs->macsec_devices_list, tx_rule->fs_id, macdev,
+			 &macsec_fs->sci_hash, true);
+
+	if (tx_rule->rule) {
+		mlx5_del_flow_rules(tx_rule->rule);
+		tx_rule->rule = NULL;
+	}
+
+	if (tx_rule->pkt_reformat) {
+		mlx5_packet_reformat_dealloc(macsec_fs->mdev, tx_rule->pkt_reformat);
+		tx_rule->pkt_reformat = NULL;
+	}
+
+	if (tx_rule->fs_id) {
+		ida_free(&macsec_fs->tx_fs->tx_halloc, tx_rule->fs_id);
+		tx_rule->fs_id = 0;
+	}
+
+	kfree(tx_rule);
+
+	macsec_fs_tx_ft_put(macsec_fs);
+}
+
+#define MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES 1
+
+static union mlx5_macsec_rule *
+macsec_fs_tx_add_rule(struct mlx5_macsec_fs *macsec_fs,
+		      const struct macsec_context *macsec_ctx,
+		      struct mlx5_macsec_rule_attrs *attrs, u32 *fs_id)
+{
+	char reformatbf[MLX5_MACSEC_TAG_LEN + MACSEC_SCI_LEN];
+	struct mlx5_pkt_reformat_params reformat_params = {};
+	struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	union mlx5_macsec_rule *macsec_rule = NULL;
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_macsec_tables *tx_tables;
+	struct mlx5_macsec_tx_rule *tx_rule;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	size_t reformat_size;
+	int err = 0;
+
+	tx_tables = &tx_fs->tables;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return NULL;
+
+	err = macsec_fs_tx_ft_get(macsec_fs);
+	if (err)
+		goto out_spec;
+
+	macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL);
+	if (!macsec_rule) {
+		macsec_fs_tx_ft_put(macsec_fs);
+		goto out_spec;
+	}
+
+	tx_rule = &macsec_rule->tx_rule;
+
+	/* Tx crypto table crypto rule */
+	macsec_fs_tx_create_sectag_header(macsec_ctx, reformatbf, &reformat_size);
+
+	reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC;
+	reformat_params.size = reformat_size;
+	reformat_params.data = reformatbf;
+
+	if (is_vlan_dev(macsec_ctx->netdev))
+		reformat_params.param_0 = MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES;
+
+	flow_act.pkt_reformat = mlx5_packet_reformat_alloc(mdev,
+							   &reformat_params,
+							   MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
+	if (IS_ERR(flow_act.pkt_reformat)) {
+		err = PTR_ERR(flow_act.pkt_reformat);
+		mlx5_core_err(mdev, "Failed to allocate MACsec Tx reformat context err=%d\n",  err);
+		goto err;
+	}
+	tx_rule->pkt_reformat = flow_act.pkt_reformat;
+
+	err = macsec_fs_tx_setup_fte(macsec_fs, spec, &flow_act, attrs->macsec_obj_id, fs_id);
+	if (err) {
+		mlx5_core_err(mdev,
+			      "Failed to add packet reformat for MACsec TX crypto rule, err=%d\n",
+			      err);
+		goto err;
+	}
+
+	tx_rule->fs_id = *fs_id;
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+			  MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT |
+			  MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = tx_tables->ft_check;
+	rule = mlx5_add_flow_rules(tx_tables->ft_crypto.t, spec, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add MACsec TX crypto rule, err=%d\n", err);
+		goto err;
+	}
+	tx_rule->rule = rule;
+
+	err = macsec_fs_id_add(&macsec_fs->macsec_devices_list, *fs_id, macsec_ctx->secy->netdev,
+			       &macsec_fs->sci_hash, attrs->sci, true);
+	if (err) {
+		mlx5_core_err(mdev, "Failed to save fs_id, err=%d\n", err);
+		goto err;
+	}
+
+	goto out_spec;
+
+err:
+	macsec_fs_tx_del_rule(macsec_fs, tx_rule, macsec_ctx->secy->netdev);
+	macsec_rule = NULL;
+out_spec:
+	kvfree(spec);
+
+	return macsec_rule;
+}
+
+static void macsec_fs_tx_cleanup(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_macsec_tables *tx_tables;
+
+	if (!tx_fs)
+		return;
+
+	tx_tables = &tx_fs->tables;
+	if (tx_tables->refcnt) {
+		mlx5_core_err(mdev,
+			      "Can't destroy MACsec offload tx_fs, refcnt(%u) isn't 0\n",
+			      tx_tables->refcnt);
+		return;
+	}
+
+	ida_destroy(&tx_fs->tx_halloc);
+
+	if (tx_tables->check_miss_rule_counter) {
+		mlx5_fc_destroy(mdev, tx_tables->check_miss_rule_counter);
+		tx_tables->check_miss_rule_counter = NULL;
+	}
+
+	if (tx_tables->check_rule_counter) {
+		mlx5_fc_destroy(mdev, tx_tables->check_rule_counter);
+		tx_tables->check_rule_counter = NULL;
+	}
+
+	kfree(tx_fs);
+	macsec_fs->tx_fs = NULL;
+}
+
+static int macsec_fs_tx_init(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_macsec_tables *tx_tables;
+	struct mlx5_macsec_tx *tx_fs;
+	struct mlx5_fc *flow_counter;
+	int err;
+
+	tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL);
+	if (!tx_fs)
+		return -ENOMEM;
+
+	tx_tables = &tx_fs->tables;
+
+	flow_counter = mlx5_fc_create(mdev, false);
+	if (IS_ERR(flow_counter)) {
+		err = PTR_ERR(flow_counter);
+		mlx5_core_err(mdev,
+			      "Failed to create MACsec Tx encrypt flow counter, err(%d)\n",
+			      err);
+		goto err_encrypt_counter;
+	}
+	tx_tables->check_rule_counter = flow_counter;
+
+	flow_counter = mlx5_fc_create(mdev, false);
+	if (IS_ERR(flow_counter)) {
+		err = PTR_ERR(flow_counter);
+		mlx5_core_err(mdev,
+			      "Failed to create MACsec Tx drop flow counter, err(%d)\n",
+			      err);
+		goto err_drop_counter;
+	}
+	tx_tables->check_miss_rule_counter = flow_counter;
+
+	ida_init(&tx_fs->tx_halloc);
+	INIT_LIST_HEAD(&macsec_fs->macsec_devices_list);
+
+	macsec_fs->tx_fs = tx_fs;
+
+	return 0;
+
+err_drop_counter:
+	mlx5_fc_destroy(mdev, tx_tables->check_rule_counter);
+	tx_tables->check_rule_counter = NULL;
+
+err_encrypt_counter:
+	kfree(tx_fs);
+	macsec_fs->tx_fs = NULL;
+
+	return err;
+}
+
+static void macsec_fs_rx_roce_miss_destroy(struct mlx5_macsec_miss *miss)
+{
+	mlx5_del_flow_rules(miss->rule);
+	mlx5_destroy_flow_group(miss->g);
+}
+
+static void macsec_fs_rdma_rx_destroy(struct mlx5_macsec_rx_roce *roce, struct mlx5_core_dev *mdev)
+{
+	if (!mlx5_is_macsec_roce_supported(mdev))
+		return;
+
+	mlx5_del_flow_rules(roce->nic_miss.rule);
+	mlx5_del_flow_rules(roce->rule);
+	mlx5_modify_header_dealloc(mdev, roce->copy_modify_hdr);
+	mlx5_destroy_flow_group(roce->nic_miss.g);
+	mlx5_destroy_flow_group(roce->g);
+	mlx5_destroy_flow_table(roce->ft);
+
+	macsec_fs_rx_roce_miss_destroy(&roce->miss);
+	mlx5_destroy_flow_table(roce->ft_macsec_op_check);
+	mlx5_destroy_flow_table(roce->ft_ip_check);
+}
+
+static void macsec_fs_rx_destroy(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs;
+	struct mlx5_macsec_tables *rx_tables;
+	int i;
+
+	/* Rx check table */
+	for (i = 1; i >= 0; --i) {
+		if (rx_fs->check_rule[i]) {
+			mlx5_del_flow_rules(rx_fs->check_rule[i]);
+			rx_fs->check_rule[i] = NULL;
+		}
+
+		if (rx_fs->check_rule_pkt_reformat[i]) {
+			mlx5_packet_reformat_dealloc(macsec_fs->mdev,
+						     rx_fs->check_rule_pkt_reformat[i]);
+			rx_fs->check_rule_pkt_reformat[i] = NULL;
+		}
+	}
+
+	rx_tables = &rx_fs->tables;
+
+	if (rx_tables->check_miss_rule) {
+		mlx5_del_flow_rules(rx_tables->check_miss_rule);
+		rx_tables->check_miss_rule = NULL;
+	}
+
+	if (rx_tables->ft_check_group) {
+		mlx5_destroy_flow_group(rx_tables->ft_check_group);
+		rx_tables->ft_check_group = NULL;
+	}
+
+	if (rx_tables->ft_check) {
+		mlx5_destroy_flow_table(rx_tables->ft_check);
+		rx_tables->ft_check = NULL;
+	}
+
+	/* Rx crypto table */
+	if (rx_tables->crypto_miss_rule) {
+		mlx5_del_flow_rules(rx_tables->crypto_miss_rule);
+		rx_tables->crypto_miss_rule = NULL;
+	}
+
+	macsec_fs_destroy_flow_table(&rx_tables->ft_crypto);
+
+	macsec_fs_rdma_rx_destroy(&macsec_fs->rx_fs->roce, macsec_fs->mdev);
+}
+
+static int macsec_fs_rx_create_crypto_table_groups(struct mlx5_macsec_flow_table *ft)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	int mclen = MLX5_ST_SZ_BYTES(fte_match_param);
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	ft->g = kcalloc(RX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+	if (!ft->g)
+		return -ENOMEM;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in) {
+		kfree(ft->g);
+		return -ENOMEM;
+	}
+
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+	/* Flow group for SA rule with SCI */
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+						MLX5_MATCH_MISC_PARAMETERS_5);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+	MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0,
+		 MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK <<
+		 MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+	MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_2);
+	MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_3);
+
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	/* Flow group for SA rule without SCI */
+	memset(in, 0, inlen);
+	memset(mc, 0, mclen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+						MLX5_MATCH_MISC_PARAMETERS_5);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_47_16);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_15_0);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+	MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0,
+		 MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	/* Flow Group for l2 traps */
+	memset(in, 0, inlen);
+	memset(mc, 0, mclen);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	kvfree(in);
+	return 0;
+
+err:
+	err = PTR_ERR(ft->g[ft->num_groups]);
+	ft->g[ft->num_groups] = NULL;
+	kvfree(in);
+
+	return err;
+}
+
+static int macsec_fs_rx_create_check_decap_rule(struct mlx5_macsec_fs *macsec_fs,
+						struct mlx5_flow_destination *dest,
+						struct mlx5_flow_act *flow_act,
+						struct mlx5_flow_spec *spec,
+						int reformat_param_size)
+{
+	int rule_index = (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) ? 0 : 1;
+	u8 mlx5_reformat_buf[MLX5_SECTAG_HEADER_SIZE_WITH_SCI];
+	struct mlx5_pkt_reformat_params reformat_params = {};
+	struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_flow_destination roce_dest[2];
+	struct mlx5_macsec_tables *rx_tables;
+	struct mlx5_flow_handle *rule;
+	int err = 0, dstn = 0;
+
+	rx_tables = &rx_fs->tables;
+
+	/* Rx check table decap 16B rule */
+	memset(dest, 0, sizeof(*dest));
+	memset(flow_act, 0, sizeof(*flow_act));
+	memset(spec, 0, sizeof(*spec));
+
+	reformat_params.type = MLX5_REFORMAT_TYPE_DEL_MACSEC;
+	reformat_params.size = reformat_param_size;
+	reformat_params.data = mlx5_reformat_buf;
+	flow_act->pkt_reformat = mlx5_packet_reformat_alloc(mdev,
+							    &reformat_params,
+							    MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
+	if (IS_ERR(flow_act->pkt_reformat)) {
+		err = PTR_ERR(flow_act->pkt_reformat);
+		mlx5_core_err(mdev, "Failed to allocate MACsec Rx reformat context err=%d\n", err);
+		return err;
+	}
+	rx_fs->check_rule_pkt_reformat[rule_index] = flow_act->pkt_reformat;
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+	/* MACsec syndrome match */
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.macsec_syndrome);
+	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.macsec_syndrome, 0);
+	/* ASO return reg syndrome match */
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4);
+	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0);
+
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5;
+	/* Sectag TCI SC present bit*/
+	MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0,
+		 MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+	if (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI)
+		MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0,
+			 MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT <<
+			 MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+	flow_act->flags = FLOW_ACT_NO_APPEND;
+
+	if (rx_fs->roce.ft) {
+		flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+		roce_dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		roce_dest[dstn].ft = rx_fs->roce.ft;
+		dstn++;
+	} else {
+		flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+	}
+
+	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+			    MLX5_FLOW_CONTEXT_ACTION_COUNT;
+	roce_dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	roce_dest[dstn].counter_id = mlx5_fc_id(rx_tables->check_rule_counter);
+	rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, roce_dest, dstn + 1);
+
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add MACsec Rx check rule, err=%d\n", err);
+		return err;
+	}
+
+	rx_fs->check_rule[rule_index] = rule;
+
+	return 0;
+}
+
+static int macsec_fs_rx_roce_miss_create(struct mlx5_core_dev *mdev,
+					 struct mlx5_macsec_rx_roce *roce)
+{
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_group *flow_group;
+	struct mlx5_flow_handle *rule;
+	u32 *flow_group_in;
+	int err;
+
+	flow_group_in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	/* IP check ft has no miss rule since we use default miss action which is go to next PRIO */
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
+		 roce->ft_macsec_op_check->max_fte - 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+		 roce->ft_macsec_op_check->max_fte - 1);
+	flow_group = mlx5_create_flow_group(roce->ft_macsec_op_check, flow_group_in);
+	if (IS_ERR(flow_group)) {
+		err = PTR_ERR(flow_group);
+		mlx5_core_err(mdev,
+			      "Failed to create miss flow group for MACsec RoCE operation check table err(%d)\n",
+			      err);
+		goto err_macsec_op_miss_group;
+	}
+	roce->miss.g = flow_group;
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+	rule = mlx5_add_flow_rules(roce->ft_macsec_op_check,  NULL, &flow_act, NULL, 0);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add miss rule to MACsec RoCE operation check table err(%d)\n",
+			      err);
+		goto err_macsec_op_rule;
+	}
+	roce->miss.rule = rule;
+
+	kvfree(flow_group_in);
+	return 0;
+
+err_macsec_op_rule:
+	mlx5_destroy_flow_group(roce->miss.g);
+err_macsec_op_miss_group:
+	kvfree(flow_group_in);
+	return err;
+}
+
+#define MLX5_RX_ROCE_GROUP_SIZE BIT(0)
+
+static int macsec_fs_rx_roce_jump_to_rdma_groups_create(struct mlx5_core_dev *mdev,
+							struct mlx5_macsec_rx_roce *roce)
+{
+	struct mlx5_flow_group *g;
+	void *outer_headers_c;
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+	outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
+
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_RX_ROCE_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	g = mlx5_create_flow_group(roce->ft, in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		mlx5_core_err(mdev, "Failed to create main flow group for MACsec RoCE NIC UDP table err(%d)\n",
+			      err);
+		goto err_udp_group;
+	}
+	roce->g = g;
+
+	memset(in, 0, MLX5_ST_SZ_BYTES(create_flow_group_in));
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5_RX_ROCE_GROUP_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	g = mlx5_create_flow_group(roce->ft, in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		mlx5_core_err(mdev, "Failed to create miss flow group for MACsec RoCE NIC UDP table err(%d)\n",
+			      err);
+		goto err_udp_miss_group;
+	}
+	roce->nic_miss.g = g;
+
+	kvfree(in);
+	return 0;
+
+err_udp_miss_group:
+	mlx5_destroy_flow_group(roce->g);
+err_udp_group:
+	kvfree(in);
+	return err;
+}
+
+static int macsec_fs_rx_roce_jump_to_rdma_rules_create(struct mlx5_macsec_fs *macsec_fs,
+						       struct mlx5_macsec_rx_roce *roce)
+{
+	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_flow_destination dst = {};
+	struct mlx5_modify_hdr *modify_hdr;
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	int err;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, ROCE_V2_UDP_DPORT);
+
+	MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY);
+	MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+	MLX5_SET(copy_action_in, action, src_offset, 0);
+	MLX5_SET(copy_action_in, action, length, 32);
+	MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_5);
+	MLX5_SET(copy_action_in, action, dst_offset, 0);
+
+	modify_hdr = mlx5_modify_header_alloc(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC,
+					      1, action);
+
+	if (IS_ERR(modify_hdr)) {
+		err = PTR_ERR(modify_hdr);
+		mlx5_core_err(mdev,
+			      "Failed to alloc macsec copy modify_header_id err(%d)\n", err);
+		goto err_alloc_hdr;
+	}
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	flow_act.modify_hdr = modify_hdr;
+	dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE;
+	dst.ft = roce->ft_ip_check;
+	rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add rule to MACsec RoCE NIC UDP table err(%d)\n",
+			      err);
+		goto err_add_rule;
+	}
+	roce->rule = rule;
+	roce->copy_modify_hdr = modify_hdr;
+
+	memset(&flow_act, 0, sizeof(flow_act));
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+	rule = mlx5_add_flow_rules(roce->ft, NULL, &flow_act, NULL, 0);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add miss rule to MACsec RoCE NIC UDP table err(%d)\n",
+			      err);
+		goto err_add_rule2;
+	}
+	roce->nic_miss.rule = rule;
+
+	kvfree(spec);
+	return 0;
+
+err_add_rule2:
+	mlx5_del_flow_rules(roce->rule);
+err_add_rule:
+	mlx5_modify_header_dealloc(macsec_fs->mdev, modify_hdr);
+err_alloc_hdr:
+	kvfree(spec);
+	return err;
+}
+
+static int macsec_fs_rx_roce_jump_to_rdma_create(struct mlx5_macsec_fs *macsec_fs,
+						 struct mlx5_macsec_rx_roce *roce)
+{
+	int err;
+
+	err = macsec_fs_rx_roce_jump_to_rdma_groups_create(macsec_fs->mdev, roce);
+	if (err)
+		return err;
+
+	err = macsec_fs_rx_roce_jump_to_rdma_rules_create(macsec_fs, roce);
+	if (err)
+		goto err;
+
+	return 0;
+err:
+	mlx5_destroy_flow_group(roce->nic_miss.g);
+	mlx5_destroy_flow_group(roce->g);
+	return err;
+}
+
+static int macsec_fs_rx_roce_create(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *ft;
+	int err = 0;
+
+	if (!mlx5_is_macsec_roce_supported(macsec_fs->mdev)) {
+		mlx5_core_dbg(mdev, "Failed to init RoCE MACsec, capabilities not supported\n");
+		return 0;
+	}
+
+	ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC);
+	if (!ns)
+		return -ENOMEM;
+
+	ft = macsec_fs_auto_group_table_create(ns, 0, RDMA_RX_ROCE_IP_TABLE_LEVEL,
+					       CRYPTO_NUM_MAXSEC_FTE);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		mlx5_core_err(mdev,
+			      "Failed to create MACsec IP check RoCE table err(%d)\n", err);
+		return err;
+	}
+	rx_fs->roce.ft_ip_check = ft;
+
+	ft = macsec_fs_auto_group_table_create(ns, 0, RDMA_RX_ROCE_MACSEC_OP_TABLE_LEVEL,
+					       CRYPTO_NUM_MAXSEC_FTE);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		mlx5_core_err(mdev,
+			      "Failed to create MACsec operation check RoCE table err(%d)\n",
+			      err);
+		goto err_macsec_op;
+	}
+	rx_fs->roce.ft_macsec_op_check = ft;
+
+	err = macsec_fs_rx_roce_miss_create(mdev, &rx_fs->roce);
+	if (err)
+		goto err_miss_create;
+
+	ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
+	if (!ns) {
+		err = -EOPNOTSUPP;
+		goto err_ns;
+	}
+
+	ft_attr.level = RX_ROCE_TABLE_LEVEL;
+	ft_attr.max_fte = RX_ROCE_TABLE_NUM_FTE;
+	ft = mlx5_create_flow_table(ns, &ft_attr);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		mlx5_core_err(mdev,
+			      "Failed to create MACsec jump to RX RoCE, NIC table err(%d)\n", err);
+		goto err_ns;
+	}
+	rx_fs->roce.ft = ft;
+
+	err = macsec_fs_rx_roce_jump_to_rdma_create(macsec_fs, &rx_fs->roce);
+	if (err)
+		goto err_udp_ft;
+
+	return 0;
+
+err_udp_ft:
+	mlx5_destroy_flow_table(rx_fs->roce.ft);
+err_ns:
+	macsec_fs_rx_roce_miss_destroy(&rx_fs->roce.miss);
+err_miss_create:
+	mlx5_destroy_flow_table(rx_fs->roce.ft_macsec_op_check);
+err_macsec_op:
+	mlx5_destroy_flow_table(rx_fs->roce.ft_ip_check);
+	return err;
+}
+
+static int macsec_fs_rx_create(struct mlx5_macsec_fs *macsec_fs)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_macsec_flow_table *ft_crypto;
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_macsec_tables *rx_tables;
+	struct mlx5_flow_table *flow_table;
+	struct mlx5_flow_group *flow_group;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	u32 *flow_group_in;
+	int err;
+
+	ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
+	if (!ns)
+		return -ENOMEM;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in) {
+		err = -ENOMEM;
+		goto free_spec;
+	}
+
+	rx_tables = &rx_fs->tables;
+	ft_crypto = &rx_tables->ft_crypto;
+
+	err = macsec_fs_rx_roce_create(macsec_fs);
+	if (err)
+		goto out_flow_group;
+
+	/* Rx crypto table */
+	ft_attr.level = RX_CRYPTO_TABLE_LEVEL;
+	ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE;
+
+	flow_table = mlx5_create_flow_table(ns, &ft_attr);
+	if (IS_ERR(flow_table)) {
+		err = PTR_ERR(flow_table);
+		mlx5_core_err(mdev, "Failed to create MACsec Rx crypto table err(%d)\n", err);
+		goto err;
+	}
+	ft_crypto->t = flow_table;
+
+	/* Rx crypto table groups */
+	err = macsec_fs_rx_create_crypto_table_groups(ft_crypto);
+	if (err) {
+		mlx5_core_err(mdev,
+			      "Failed to create default flow group for MACsec Tx crypto table err(%d)\n",
+			      err);
+		goto err;
+	}
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+	rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev,
+			      "Failed to add MACsec Rx crypto table default miss rule %d\n",
+			      err);
+		goto err;
+	}
+	rx_tables->crypto_miss_rule = rule;
+
+	/* Rx check table */
+	flow_table = macsec_fs_auto_group_table_create(ns,
+						       MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT,
+						       RX_CHECK_TABLE_LEVEL,
+						       RX_CHECK_TABLE_NUM_FTE);
+	if (IS_ERR(flow_table)) {
+		err = PTR_ERR(flow_table);
+		mlx5_core_err(mdev, "Fail to create MACsec RX check table, err(%d)\n", err);
+		goto err;
+	}
+	rx_tables->ft_check = flow_table;
+
+	/* Rx check table Default miss group/rule */
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1);
+	flow_group = mlx5_create_flow_group(rx_tables->ft_check, flow_group_in);
+	if (IS_ERR(flow_group)) {
+		err = PTR_ERR(flow_group);
+		mlx5_core_err(mdev,
+			      "Failed to create default flow group for MACsec Rx check table err(%d)\n",
+			      err);
+		goto err;
+	}
+	rx_tables->ft_check_group = flow_group;
+
+	/* Rx check table default drop rule */
+	memset(&flow_act, 0, sizeof(flow_act));
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter);
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+	rule = mlx5_add_flow_rules(rx_tables->ft_check,  NULL, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to added MACsec Rx check drop rule, err(%d)\n", err);
+		goto err;
+	}
+	rx_tables->check_miss_rule = rule;
+
+	/* Rx check table decap rules */
+	err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec,
+						   MLX5_SECTAG_HEADER_SIZE_WITH_SCI);
+	if (err)
+		goto err;
+
+	err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec,
+						   MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI);
+	if (err)
+		goto err;
+
+	goto out_flow_group;
+
+err:
+	macsec_fs_rx_destroy(macsec_fs);
+out_flow_group:
+	kvfree(flow_group_in);
+free_spec:
+	kvfree(spec);
+	return err;
+}
+
+static int macsec_fs_rx_ft_get(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+	int err = 0;
+
+	if (rx_tables->refcnt)
+		goto out;
+
+	err = macsec_fs_rx_create(macsec_fs);
+	if (err)
+		return err;
+
+out:
+	rx_tables->refcnt++;
+	return err;
+}
+
+static void macsec_fs_rx_ft_put(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+
+	if (--rx_tables->refcnt)
+		return;
+
+	macsec_fs_rx_destroy(macsec_fs);
+}
+
+static void macsec_fs_rx_del_rule(struct mlx5_macsec_fs *macsec_fs,
+				  struct mlx5_macsec_rx_rule *rx_rule,
+				  void *macdev, u32 fs_id)
+{
+	int i;
+
+	macsec_fs_id_del(&macsec_fs->macsec_devices_list, fs_id, macdev,
+			 &macsec_fs->fs_id_hash, false);
+
+	for (i = 0; i < RX_NUM_OF_RULES_PER_SA; ++i) {
+		if (rx_rule->rule[i]) {
+			mlx5_del_flow_rules(rx_rule->rule[i]);
+			rx_rule->rule[i] = NULL;
+		}
+	}
+
+	if (rx_rule->meta_modhdr) {
+		mlx5_modify_header_dealloc(macsec_fs->mdev, rx_rule->meta_modhdr);
+		rx_rule->meta_modhdr = NULL;
+	}
+
+	kfree(rx_rule);
+
+	macsec_fs_rx_ft_put(macsec_fs);
+}
+
+static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec,
+				   struct mlx5_flow_act *flow_act,
+				   struct mlx5_macsec_rule_attrs *attrs,
+				   bool sci_present)
+{
+	u8 tci_an = (sci_present << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) | attrs->assoc_num;
+	struct mlx5_flow_act_crypto_params *crypto_params = &flow_act->crypto;
+	__be32 *sci_p = (__be32 *)(&attrs->sci);
+
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+	/* MACsec ethertype */
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_MACSEC);
+
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5;
+
+	/* Sectag AN + TCI SC present bit*/
+	MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0,
+		 MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0,
+		 tci_an << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET);
+
+	if (sci_present) {
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 misc_parameters_5.macsec_tag_2);
+		MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_2,
+			 be32_to_cpu(sci_p[0]));
+
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 misc_parameters_5.macsec_tag_3);
+		MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_3,
+			 be32_to_cpu(sci_p[1]));
+	} else {
+		/* When SCI isn't present in the Sectag, need to match the source */
+		/* MAC address only if the SCI contains the default MACsec PORT	  */
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16),
+		       sci_p, ETH_ALEN);
+	}
+
+	crypto_params->type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC;
+	crypto_params->obj_id = attrs->macsec_obj_id;
+}
+
+static union mlx5_macsec_rule *
+macsec_fs_rx_add_rule(struct mlx5_macsec_fs *macsec_fs,
+		      const struct macsec_context *macsec_ctx,
+		      struct mlx5_macsec_rule_attrs *attrs,
+		      u32 fs_id)
+{
+	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+	struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	union mlx5_macsec_rule *macsec_rule = NULL;
+	struct mlx5_modify_hdr *modify_hdr = NULL;
+	struct mlx5_macsec_flow_table *ft_crypto;
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_macsec_tables *rx_tables;
+	struct mlx5_macsec_rx_rule *rx_rule;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return NULL;
+
+	err = macsec_fs_rx_ft_get(macsec_fs);
+	if (err)
+		goto out_spec;
+
+	macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL);
+	if (!macsec_rule) {
+		macsec_fs_rx_ft_put(macsec_fs);
+		goto out_spec;
+	}
+
+	rx_rule = &macsec_rule->rx_rule;
+	rx_tables = &rx_fs->tables;
+	ft_crypto = &rx_tables->ft_crypto;
+
+	/* Set bit[31 - 30] macsec marker - 0x01 */
+	/* Set bit[15-0] fs id */
+	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+	MLX5_SET(set_action_in, action, data, macsec_fs_set_rx_fs_id(fs_id));
+	MLX5_SET(set_action_in, action, offset, 0);
+	MLX5_SET(set_action_in, action, length, 32);
+
+	modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC,
+					      1, action);
+	if (IS_ERR(modify_hdr)) {
+		err = PTR_ERR(modify_hdr);
+		mlx5_core_err(mdev, "Fail to alloc MACsec set modify_header_id err=%d\n", err);
+		modify_hdr = NULL;
+		goto err;
+	}
+	rx_rule->meta_modhdr = modify_hdr;
+
+	/* Rx crypto table with SCI rule */
+	macsec_fs_rx_setup_fte(spec, &flow_act, attrs, true);
+
+	flow_act.modify_hdr = modify_hdr;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+			  MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = rx_tables->ft_check;
+	rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev,
+			      "Failed to add SA with SCI rule to Rx crypto rule, err=%d\n",
+			      err);
+		goto err;
+	}
+	rx_rule->rule[0] = rule;
+
+	/* Rx crypto table without SCI rule */
+	if ((cpu_to_be64((__force u64)attrs->sci) & 0xFFFF) == ntohs(MACSEC_PORT_ES)) {
+		memset(spec, 0, sizeof(struct mlx5_flow_spec));
+		memset(&dest, 0, sizeof(struct mlx5_flow_destination));
+		memset(&flow_act, 0, sizeof(flow_act));
+
+		macsec_fs_rx_setup_fte(spec, &flow_act, attrs, false);
+
+		flow_act.modify_hdr = modify_hdr;
+		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+				  MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
+				  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		dest.ft = rx_tables->ft_check;
+		rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1);
+		if (IS_ERR(rule)) {
+			err = PTR_ERR(rule);
+			mlx5_core_err(mdev,
+				      "Failed to add SA without SCI rule to Rx crypto rule, err=%d\n",
+				      err);
+			goto err;
+		}
+		rx_rule->rule[1] = rule;
+	}
+
+	err = macsec_fs_id_add(&macsec_fs->macsec_devices_list, fs_id, macsec_ctx->secy->netdev,
+			       &macsec_fs->fs_id_hash, attrs->sci, false);
+	if (err) {
+		mlx5_core_err(mdev, "Failed to save fs_id, err=%d\n", err);
+		goto err;
+	}
+
+	kvfree(spec);
+	return macsec_rule;
+
+err:
+	macsec_fs_rx_del_rule(macsec_fs, rx_rule, macsec_ctx->secy->netdev, fs_id);
+	macsec_rule = NULL;
+out_spec:
+	kvfree(spec);
+	return macsec_rule;
+}
+
+static int macsec_fs_rx_init(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_macsec_tables *rx_tables;
+	struct mlx5_macsec_rx *rx_fs;
+	struct mlx5_fc *flow_counter;
+	int err;
+
+	rx_fs =	kzalloc(sizeof(*rx_fs), GFP_KERNEL);
+	if (!rx_fs)
+		return -ENOMEM;
+
+	flow_counter = mlx5_fc_create(mdev, false);
+	if (IS_ERR(flow_counter)) {
+		err = PTR_ERR(flow_counter);
+		mlx5_core_err(mdev,
+			      "Failed to create MACsec Rx encrypt flow counter, err(%d)\n",
+			      err);
+		goto err_encrypt_counter;
+	}
+
+	rx_tables = &rx_fs->tables;
+	rx_tables->check_rule_counter = flow_counter;
+
+	flow_counter = mlx5_fc_create(mdev, false);
+	if (IS_ERR(flow_counter)) {
+		err = PTR_ERR(flow_counter);
+		mlx5_core_err(mdev,
+			      "Failed to create MACsec Rx drop flow counter, err(%d)\n",
+			      err);
+		goto err_drop_counter;
+	}
+	rx_tables->check_miss_rule_counter = flow_counter;
+
+	macsec_fs->rx_fs = rx_fs;
+
+	return 0;
+
+err_drop_counter:
+	mlx5_fc_destroy(mdev, rx_tables->check_rule_counter);
+	rx_tables->check_rule_counter = NULL;
+
+err_encrypt_counter:
+	kfree(rx_fs);
+	macsec_fs->rx_fs = NULL;
+
+	return err;
+}
+
+static void macsec_fs_rx_cleanup(struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_macsec_tables *rx_tables;
+
+	if (!rx_fs)
+		return;
+
+	rx_tables = &rx_fs->tables;
+
+	if (rx_tables->refcnt) {
+		mlx5_core_err(mdev,
+			      "Can't destroy MACsec offload rx_fs, refcnt(%u) isn't 0\n",
+			      rx_tables->refcnt);
+		return;
+	}
+
+	if (rx_tables->check_miss_rule_counter) {
+		mlx5_fc_destroy(mdev, rx_tables->check_miss_rule_counter);
+		rx_tables->check_miss_rule_counter = NULL;
+	}
+
+	if (rx_tables->check_rule_counter) {
+		mlx5_fc_destroy(mdev, rx_tables->check_rule_counter);
+		rx_tables->check_rule_counter = NULL;
+	}
+
+	kfree(rx_fs);
+	macsec_fs->rx_fs = NULL;
+}
+
+static void set_ipaddr_spec_v4(struct sockaddr_in *in, struct mlx5_flow_spec *spec, bool is_dst_ip)
+{
+	MLX5_SET(fte_match_param, spec->match_value,
+		 outer_headers.ip_version, MLX5_FS_IPV4_VERSION);
+
+	if (is_dst_ip) {
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+		       &in->sin_addr.s_addr, 4);
+	} else {
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
+		       &in->sin_addr.s_addr, 4);
+	}
+}
+
+static void set_ipaddr_spec_v6(struct sockaddr_in6 *in6, struct mlx5_flow_spec *spec,
+			       bool is_dst_ip)
+{
+	MLX5_SET(fte_match_param, spec->match_value,
+		 outer_headers.ip_version, MLX5_FS_IPV6_VERSION);
+
+	if (is_dst_ip) {
+		memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+		       0xff, 16);
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+		       &in6->sin6_addr, 16);
+	} else {
+		memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+		       0xff, 16);
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+		       &in6->sin6_addr, 16);
+	}
+}
+
+static void set_ipaddr_spec(const struct sockaddr *addr,
+			    struct mlx5_flow_spec *spec, bool is_dst_ip)
+{
+	struct sockaddr_in6 *in6;
+
+	spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+			 outer_headers.ip_version);
+
+	if (addr->sa_family == AF_INET) {
+		struct sockaddr_in *in = (struct sockaddr_in *)addr;
+
+		set_ipaddr_spec_v4(in, spec, is_dst_ip);
+		return;
+	}
+
+	in6 = (struct sockaddr_in6 *)addr;
+	set_ipaddr_spec_v6(in6, spec, is_dst_ip);
+}
+
+static void macsec_fs_del_roce_rule_rx(struct mlx5_roce_macsec_rx_rule *rx_rule)
+{
+	mlx5_del_flow_rules(rx_rule->op);
+	mlx5_del_flow_rules(rx_rule->ip);
+	list_del(&rx_rule->entry);
+	kfree(rx_rule);
+}
+
+static void macsec_fs_del_roce_rules_rx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id,
+					struct list_head *rx_rules_list)
+{
+	struct mlx5_roce_macsec_rx_rule *rx_rule, *next;
+
+	if (!mlx5_is_macsec_roce_supported(macsec_fs->mdev))
+		return;
+
+	list_for_each_entry_safe(rx_rule, next, rx_rules_list, entry) {
+		if (rx_rule->fs_id == fs_id)
+			macsec_fs_del_roce_rule_rx(rx_rule);
+	}
+}
+
+static void macsec_fs_del_roce_rule_tx(struct mlx5_core_dev *mdev,
+				       struct mlx5_roce_macsec_tx_rule *tx_rule)
+{
+	mlx5_del_flow_rules(tx_rule->rule);
+	mlx5_modify_header_dealloc(mdev, tx_rule->meta_modhdr);
+	list_del(&tx_rule->entry);
+	kfree(tx_rule);
+}
+
+static void macsec_fs_del_roce_rules_tx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id,
+					struct list_head *tx_rules_list)
+{
+	struct mlx5_roce_macsec_tx_rule *tx_rule, *next;
+
+	if (!mlx5_is_macsec_roce_supported(macsec_fs->mdev))
+		return;
+
+	list_for_each_entry_safe(tx_rule, next, tx_rules_list, entry) {
+		if (tx_rule->fs_id == fs_id)
+			macsec_fs_del_roce_rule_tx(macsec_fs->mdev, tx_rule);
+	}
+}
+
+void mlx5_macsec_fs_get_stats_fill(struct mlx5_macsec_fs *macsec_fs, void *macsec_stats)
+{
+	struct mlx5_macsec_stats *stats = (struct mlx5_macsec_stats *)macsec_stats;
+	struct mlx5_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables;
+	struct mlx5_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+
+	if (tx_tables->check_rule_counter)
+		mlx5_fc_query(mdev, tx_tables->check_rule_counter,
+			      &stats->macsec_tx_pkts, &stats->macsec_tx_bytes);
+
+	if (tx_tables->check_miss_rule_counter)
+		mlx5_fc_query(mdev, tx_tables->check_miss_rule_counter,
+			      &stats->macsec_tx_pkts_drop, &stats->macsec_tx_bytes_drop);
+
+	if (rx_tables->check_rule_counter)
+		mlx5_fc_query(mdev, rx_tables->check_rule_counter,
+			      &stats->macsec_rx_pkts, &stats->macsec_rx_bytes);
+
+	if (rx_tables->check_miss_rule_counter)
+		mlx5_fc_query(mdev, rx_tables->check_miss_rule_counter,
+			      &stats->macsec_rx_pkts_drop, &stats->macsec_rx_bytes_drop);
+}
+
+struct mlx5_macsec_stats *mlx5_macsec_fs_get_stats(struct mlx5_macsec_fs *macsec_fs)
+{
+	if (!macsec_fs)
+		return NULL;
+
+	return &macsec_fs->stats;
+}
+
+u32 mlx5_macsec_fs_get_fs_id_from_hashtable(struct mlx5_macsec_fs *macsec_fs, sci_t *sci)
+{
+	struct mlx5_fs_id *mlx5_fs_id;
+	u32 fs_id = 0;
+
+	rcu_read_lock();
+	mlx5_fs_id = rhashtable_lookup(&macsec_fs->sci_hash, sci, rhash_sci);
+	if (mlx5_fs_id)
+		fs_id = mlx5_fs_id->id;
+	rcu_read_unlock();
+
+	return fs_id;
+}
+
+union mlx5_macsec_rule *
+mlx5_macsec_fs_add_rule(struct mlx5_macsec_fs *macsec_fs,
+			const struct macsec_context *macsec_ctx,
+			struct mlx5_macsec_rule_attrs *attrs,
+			u32 *sa_fs_id)
+{
+	struct mlx5_macsec_event_data data = {.macsec_fs = macsec_fs,
+					      .macdev = macsec_ctx->secy->netdev,
+					      .is_tx =
+					      (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT)
+	};
+	union mlx5_macsec_rule *macsec_rule;
+	u32 tx_new_fs_id;
+
+	macsec_rule = (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
+		macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, &tx_new_fs_id) :
+		macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id);
+
+	data.fs_id = (data.is_tx) ? tx_new_fs_id : *sa_fs_id;
+	if (macsec_rule)
+		blocking_notifier_call_chain(&macsec_fs->mdev->macsec_nh,
+					     MLX5_DRIVER_EVENT_MACSEC_SA_ADDED,
+					     &data);
+
+	return macsec_rule;
+}
+
+void mlx5_macsec_fs_del_rule(struct mlx5_macsec_fs *macsec_fs,
+			     union mlx5_macsec_rule *macsec_rule,
+			     int action, void *macdev, u32 sa_fs_id)
+{
+	struct mlx5_macsec_event_data data = {.macsec_fs = macsec_fs,
+					      .macdev = macdev,
+					      .is_tx = (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT)
+	};
+
+	data.fs_id = (data.is_tx) ? macsec_rule->tx_rule.fs_id : sa_fs_id;
+	blocking_notifier_call_chain(&macsec_fs->mdev->macsec_nh,
+				     MLX5_DRIVER_EVENT_MACSEC_SA_DELETED,
+				     &data);
+
+	(action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ?
+		macsec_fs_tx_del_rule(macsec_fs, &macsec_rule->tx_rule, macdev) :
+		macsec_fs_rx_del_rule(macsec_fs, &macsec_rule->rx_rule, macdev, sa_fs_id);
+}
+
+static int mlx5_macsec_fs_add_roce_rule_rx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, u16 gid_idx,
+					   const struct sockaddr *addr,
+					   struct list_head *rx_rules_list)
+{
+	struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs;
+	struct mlx5_roce_macsec_rx_rule *rx_rule;
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *new_rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	rx_rule = kzalloc(sizeof(*rx_rule), GFP_KERNEL);
+	if (!rx_rule) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	set_ipaddr_spec(addr, spec, true);
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	dest.ft = rx_fs->roce.ft_macsec_op_check;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	new_rule = mlx5_add_flow_rules(rx_fs->roce.ft_ip_check, spec, &flow_act,
+				       &dest, 1);
+	if (IS_ERR(new_rule)) {
+		err = PTR_ERR(new_rule);
+		goto ip_rule_err;
+	}
+	rx_rule->ip = new_rule;
+
+	memset(&flow_act, 0, sizeof(flow_act));
+	memset(spec, 0, sizeof(*spec));
+
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_5);
+	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_5,
+		 macsec_fs_set_rx_fs_id(fs_id));
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	new_rule = mlx5_add_flow_rules(rx_fs->roce.ft_macsec_op_check, spec, &flow_act,
+				       NULL, 0);
+	if (IS_ERR(new_rule)) {
+		err = PTR_ERR(new_rule);
+		goto op_rule_err;
+	}
+	rx_rule->op = new_rule;
+	rx_rule->gid_idx = gid_idx;
+	rx_rule->fs_id = fs_id;
+	list_add_tail(&rx_rule->entry, rx_rules_list);
+
+	goto out;
+
+op_rule_err:
+	mlx5_del_flow_rules(rx_rule->ip);
+	rx_rule->ip = NULL;
+ip_rule_err:
+	kfree(rx_rule);
+out:
+	kvfree(spec);
+	return err;
+}
+
+static int mlx5_macsec_fs_add_roce_rule_tx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, u16 gid_idx,
+					   const struct sockaddr *addr,
+					   struct list_head *tx_rules_list)
+{
+	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+	struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_modify_hdr *modify_hdr = NULL;
+	struct mlx5_roce_macsec_tx_rule *tx_rule;
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *new_rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	tx_rule = kzalloc(sizeof(*tx_rule), GFP_KERNEL);
+	if (!tx_rule) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	set_ipaddr_spec(addr, spec, false);
+
+	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_A);
+	MLX5_SET(set_action_in, action, data, macsec_fs_set_tx_fs_id(fs_id));
+	MLX5_SET(set_action_in, action, offset, 0);
+	MLX5_SET(set_action_in, action, length, 32);
+
+	modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC,
+					      1, action);
+	if (IS_ERR(modify_hdr)) {
+		err = PTR_ERR(modify_hdr);
+		mlx5_core_err(mdev, "Fail to alloc ROCE MACsec set modify_header_id err=%d\n",
+			      err);
+		modify_hdr = NULL;
+		goto modify_hdr_err;
+	}
+	tx_rule->meta_modhdr = modify_hdr;
+
+	flow_act.modify_hdr = modify_hdr;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE;
+	dest.ft = tx_fs->tables.ft_crypto.t;
+	new_rule = mlx5_add_flow_rules(tx_fs->ft_rdma_tx, spec, &flow_act, &dest, 1);
+	if (IS_ERR(new_rule)) {
+		err = PTR_ERR(new_rule);
+		mlx5_core_err(mdev, "Failed to add ROCE TX rule, err=%d\n", err);
+		goto rule_err;
+	}
+	tx_rule->rule = new_rule;
+	tx_rule->gid_idx = gid_idx;
+	tx_rule->fs_id = fs_id;
+	list_add_tail(&tx_rule->entry, tx_rules_list);
+
+	goto out;
+
+rule_err:
+	mlx5_modify_header_dealloc(mdev, tx_rule->meta_modhdr);
+modify_hdr_err:
+	kfree(tx_rule);
+out:
+	kvfree(spec);
+	return err;
+}
+
+void mlx5_macsec_del_roce_rule(u16 gid_idx, struct mlx5_macsec_fs *macsec_fs,
+			       struct list_head *tx_rules_list, struct list_head *rx_rules_list)
+{
+	struct mlx5_roce_macsec_rx_rule *rx_rule, *next_rx;
+	struct mlx5_roce_macsec_tx_rule *tx_rule, *next_tx;
+
+	list_for_each_entry_safe(tx_rule, next_tx, tx_rules_list, entry) {
+		if (tx_rule->gid_idx == gid_idx)
+			macsec_fs_del_roce_rule_tx(macsec_fs->mdev, tx_rule);
+	}
+
+	list_for_each_entry_safe(rx_rule, next_rx, rx_rules_list, entry) {
+		if (rx_rule->gid_idx == gid_idx)
+			macsec_fs_del_roce_rule_rx(rx_rule);
+	}
+}
+EXPORT_SYMBOL_GPL(mlx5_macsec_del_roce_rule);
+
+int mlx5_macsec_add_roce_rule(void *macdev, const struct sockaddr *addr, u16 gid_idx,
+			      struct list_head *tx_rules_list, struct list_head *rx_rules_list,
+			      struct mlx5_macsec_fs *macsec_fs)
+{
+	struct mlx5_macsec_device *iter, *macsec_device = NULL;
+	struct mlx5_core_dev *mdev = macsec_fs->mdev;
+	struct mlx5_fs_id *fs_id_iter;
+	unsigned long index = 0;
+	int err;
+
+	list_for_each_entry(iter, &macsec_fs->macsec_devices_list, macsec_devices_list_entry) {
+		if (iter->macdev == macdev) {
+			macsec_device = iter;
+			break;
+		}
+	}
+
+	if (!macsec_device)
+		return 0;
+
+	xa_for_each(&macsec_device->tx_id_xa, index, fs_id_iter) {
+		err = mlx5_macsec_fs_add_roce_rule_tx(macsec_fs, fs_id_iter->id, gid_idx, addr,
+						      tx_rules_list);
+		if (err) {
+			mlx5_core_err(mdev, "MACsec offload: Failed to add roce TX rule\n");
+			goto out;
+		}
+	}
+
+	index = 0;
+	xa_for_each(&macsec_device->rx_id_xa, index, fs_id_iter) {
+		err = mlx5_macsec_fs_add_roce_rule_rx(macsec_fs, fs_id_iter->id, gid_idx, addr,
+						      rx_rules_list);
+		if (err) {
+			mlx5_core_err(mdev, "MACsec offload: Failed to add roce TX rule\n");
+			goto out;
+		}
+	}
+
+	return 0;
+out:
+	mlx5_macsec_del_roce_rule(gid_idx, macsec_fs, tx_rules_list, rx_rules_list);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_macsec_add_roce_rule);
+
+void mlx5_macsec_add_roce_sa_rules(u32 fs_id, const struct sockaddr *addr, u16 gid_idx,
+				   struct list_head *tx_rules_list,
+				   struct list_head *rx_rules_list,
+				   struct mlx5_macsec_fs *macsec_fs, bool is_tx)
+{
+	(is_tx) ?
+		mlx5_macsec_fs_add_roce_rule_tx(macsec_fs, fs_id, gid_idx, addr,
+						tx_rules_list) :
+		mlx5_macsec_fs_add_roce_rule_rx(macsec_fs, fs_id, gid_idx, addr,
+						rx_rules_list);
+}
+EXPORT_SYMBOL_GPL(mlx5_macsec_add_roce_sa_rules);
+
+void mlx5_macsec_del_roce_sa_rules(u32 fs_id, struct mlx5_macsec_fs *macsec_fs,
+				   struct list_head *tx_rules_list,
+				   struct list_head *rx_rules_list, bool is_tx)
+{
+	(is_tx) ?
+		macsec_fs_del_roce_rules_tx(macsec_fs, fs_id, tx_rules_list) :
+		macsec_fs_del_roce_rules_rx(macsec_fs, fs_id, rx_rules_list);
+}
+EXPORT_SYMBOL_GPL(mlx5_macsec_del_roce_sa_rules);
+
+void mlx5_macsec_fs_cleanup(struct mlx5_macsec_fs *macsec_fs)
+{
+	macsec_fs_rx_cleanup(macsec_fs);
+	macsec_fs_tx_cleanup(macsec_fs);
+	rhashtable_destroy(&macsec_fs->fs_id_hash);
+	rhashtable_destroy(&macsec_fs->sci_hash);
+	kfree(macsec_fs);
+}
+
+struct mlx5_macsec_fs *
+mlx5_macsec_fs_init(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_macsec_fs *macsec_fs;
+	int err;
+
+	macsec_fs = kzalloc(sizeof(*macsec_fs), GFP_KERNEL);
+	if (!macsec_fs)
+		return NULL;
+
+	macsec_fs->mdev = mdev;
+
+	err = rhashtable_init(&macsec_fs->sci_hash, &rhash_sci);
+	if (err) {
+		mlx5_core_err(mdev, "MACsec offload: Failed to init SCI hash table, err=%d\n",
+			      err);
+		goto err_hash;
+	}
+
+	err = rhashtable_init(&macsec_fs->fs_id_hash, &rhash_fs_id);
+	if (err) {
+		mlx5_core_err(mdev, "MACsec offload: Failed to init FS_ID hash table, err=%d\n",
+			      err);
+		goto sci_hash_cleanup;
+	}
+
+	err = macsec_fs_tx_init(macsec_fs);
+	if (err) {
+		mlx5_core_err(mdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err);
+		goto fs_id_hash_cleanup;
+	}
+
+	err = macsec_fs_rx_init(macsec_fs);
+	if (err) {
+		mlx5_core_err(mdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err);
+		goto tx_cleanup;
+	}
+
+	BLOCKING_INIT_NOTIFIER_HEAD(&mdev->macsec_nh);
+
+	return macsec_fs;
+
+tx_cleanup:
+	macsec_fs_tx_cleanup(macsec_fs);
+fs_id_hash_cleanup:
+	rhashtable_destroy(&macsec_fs->fs_id_hash);
+sci_hash_cleanup:
+	rhashtable_destroy(&macsec_fs->sci_hash);
+err_hash:
+	kfree(macsec_fs);
+	return NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h
new file mode 100644
index 0000000000..34b80c3ef6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_MACSEC_STEERING_H__
+#define __MLX5_MACSEC_STEERING_H__
+
+#ifdef CONFIG_MLX5_MACSEC
+
+/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */
+#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */
+#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX
+#define MLX5_MACSEC_METADATA_MARKER(metadata)  ((((metadata) >> 30) & 0x3)  == 0x1)
+#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata)  ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK)
+
+#define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16
+
+struct mlx5_macsec_fs;
+union mlx5_macsec_rule;
+
+struct mlx5_macsec_rule_attrs {
+	sci_t sci;
+	u32 macsec_obj_id;
+	u8 assoc_num;
+	int action;
+};
+
+struct mlx5_macsec_stats {
+	u64 macsec_rx_pkts;
+	u64 macsec_rx_bytes;
+	u64 macsec_rx_pkts_drop;
+	u64 macsec_rx_bytes_drop;
+	u64 macsec_tx_pkts;
+	u64 macsec_tx_bytes;
+	u64 macsec_tx_pkts_drop;
+	u64 macsec_tx_bytes_drop;
+};
+
+enum mlx5_macsec_action {
+	MLX5_ACCEL_MACSEC_ACTION_ENCRYPT,
+	MLX5_ACCEL_MACSEC_ACTION_DECRYPT,
+};
+
+void mlx5_macsec_fs_cleanup(struct mlx5_macsec_fs *macsec_fs);
+
+struct mlx5_macsec_fs *
+mlx5_macsec_fs_init(struct mlx5_core_dev *mdev);
+
+union mlx5_macsec_rule *
+mlx5_macsec_fs_add_rule(struct mlx5_macsec_fs *macsec_fs,
+			const struct macsec_context *ctx,
+			struct mlx5_macsec_rule_attrs *attrs,
+			u32 *sa_fs_id);
+
+void mlx5_macsec_fs_del_rule(struct mlx5_macsec_fs *macsec_fs,
+			     union mlx5_macsec_rule *macsec_rule,
+			     int action, void *macdev, u32 sa_fs_id);
+
+void mlx5_macsec_fs_get_stats_fill(struct mlx5_macsec_fs *macsec_fs, void *macsec_stats);
+struct mlx5_macsec_stats *mlx5_macsec_fs_get_stats(struct mlx5_macsec_fs *macsec_fs);
+u32 mlx5_macsec_fs_get_fs_id_from_hashtable(struct mlx5_macsec_fs *macsec_fs, sci_t *sci);
+
+#endif
+
+#endif /* __MLX5_MACSEC_STEERING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
new file mode 100644
index 0000000000..2b5826a785
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_MLX5_H__
+#define __LIB_MLX5_H__
+
+#include "mlx5_core.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
+int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+int mlx5_crdump_enable(struct mlx5_core_dev *dev);
+void mlx5_crdump_disable(struct mlx5_core_dev *dev);
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data);
+
+static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev)
+{
+	return devlink_net(priv_to_devlink(dev));
+}
+
+static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev)
+{
+	return mdev->mlx5e_res.uplink_netdev;
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
new file mode 100644
index 0000000000..4450091e18
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/mpfs.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_core.h"
+#include "lib/mpfs.h"
+
+/* HW L2 Table (MPFS) management */
+static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac)
+{
+	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {};
+	u8 *in_mac_addr;
+
+	MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
+	MLX5_SET(set_l2_table_entry_in, in, table_index, index);
+
+	in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
+	ether_addr_copy(&in_mac_addr[2], mac);
+
+	return mlx5_cmd_exec_in(dev, set_l2_table_entry, in);
+}
+
+static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
+{
+	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {};
+
+	MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+	MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
+	return mlx5_cmd_exec_in(dev, delete_l2_table_entry, in);
+}
+
+/* UC L2 table hash node */
+struct l2table_node {
+	struct l2addr_node node;
+	u32                index; /* index in HW l2 table */
+	int                ref_count;
+};
+
+struct mlx5_mpfs {
+	struct hlist_head    hash[MLX5_L2_ADDR_HASH_SIZE];
+	struct mutex         lock; /* Synchronize l2 table access */
+	u32                  size;
+	unsigned long        *bitmap;
+};
+
+static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix)
+{
+	int err = 0;
+
+	*ix = find_first_zero_bit(l2table->bitmap, l2table->size);
+	if (*ix >= l2table->size)
+		err = -ENOSPC;
+	else
+		__set_bit(*ix, l2table->bitmap);
+
+	return err;
+}
+
+static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix)
+{
+	__clear_bit(ix, l2table->bitmap);
+}
+
+int mlx5_mpfs_init(struct mlx5_core_dev *dev)
+{
+	int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
+	struct mlx5_mpfs *mpfs;
+
+	if (!MLX5_ESWITCH_MANAGER(dev) || l2table_size == 1)
+		return 0;
+
+	mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL);
+	if (!mpfs)
+		return -ENOMEM;
+
+	mutex_init(&mpfs->lock);
+	mpfs->size   = l2table_size;
+	mpfs->bitmap = bitmap_zalloc(l2table_size, GFP_KERNEL);
+	if (!mpfs->bitmap) {
+		kfree(mpfs);
+		return -ENOMEM;
+	}
+
+	dev->priv.mpfs = mpfs;
+	return 0;
+}
+
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+
+	if (!mpfs)
+		return;
+
+	WARN_ON(!hlist_empty(mpfs->hash));
+	bitmap_free(mpfs->bitmap);
+	kfree(mpfs);
+}
+
+int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+	struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+	struct l2table_node *l2addr;
+	int err = 0;
+	u32 index;
+
+	if (!mpfs)
+		return 0;
+
+	mutex_lock(&mpfs->lock);
+
+	l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+	if (l2addr) {
+		l2addr->ref_count++;
+		goto out;
+	}
+
+	err = alloc_l2table_index(mpfs, &index);
+	if (err)
+		goto out;
+
+	l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
+	if (!l2addr) {
+		err = -ENOMEM;
+		goto hash_add_err;
+	}
+
+	err = set_l2table_entry_cmd(dev, index, mac);
+	if (err)
+		goto set_table_entry_err;
+
+	l2addr->index = index;
+	l2addr->ref_count = 1;
+
+	mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
+	goto out;
+
+set_table_entry_err:
+	l2addr_hash_del(l2addr);
+hash_add_err:
+	free_l2table_index(mpfs, index);
+out:
+	mutex_unlock(&mpfs->lock);
+	return err;
+}
+EXPORT_SYMBOL(mlx5_mpfs_add_mac);
+
+int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+	struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+	struct l2table_node *l2addr;
+	int err = 0;
+	u32 index;
+
+	if (!mpfs)
+		return 0;
+
+	mutex_lock(&mpfs->lock);
+
+	l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+	if (!l2addr) {
+		err = -ENOENT;
+		goto unlock;
+	}
+
+	if (--l2addr->ref_count > 0)
+		goto unlock;
+
+	index = l2addr->index;
+	del_l2table_entry_cmd(dev, index);
+	l2addr_hash_del(l2addr);
+	free_l2table_index(mpfs, index);
+	mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index);
+unlock:
+	mutex_unlock(&mpfs->lock);
+	return err;
+}
+EXPORT_SYMBOL(mlx5_mpfs_del_mac);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
new file mode 100644
index 0000000000..4a293542a7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_MPFS_H__
+#define __MLX5_MPFS_H__
+
+#include <linux/if_ether.h>
+#include <linux/mlx5/device.h>
+
+/* L2 -mac address based- hash helpers */
+#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
+#define MLX5_L2_ADDR_HASH(addr) (addr[5])
+
+struct l2addr_node {
+	struct hlist_node hlist;
+	u8                addr[ETH_ALEN];
+};
+
+#define for_each_l2hash_node(hn, tmp, hash, i) \
+	for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
+		hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist)
+
+#define l2addr_hash_find(hash, mac, type) ({                \
+	int ix = MLX5_L2_ADDR_HASH(mac);                    \
+	bool found = false;                                 \
+	type *ptr = NULL;                                   \
+							    \
+	hlist_for_each_entry(ptr, &(hash)[ix], node.hlist)  \
+		if (ether_addr_equal(ptr->node.addr, mac)) {\
+			found = true;                       \
+			break;                              \
+		}                                           \
+	if (!found)                                         \
+		ptr = NULL;                                 \
+	ptr;                                                \
+})
+
+#define l2addr_hash_add(hash, mac, type, gfp) ({            \
+	int ix = MLX5_L2_ADDR_HASH(mac);                    \
+	type *ptr = NULL;                                   \
+							    \
+	ptr = kzalloc(sizeof(type), gfp);                   \
+	if (ptr) {                                          \
+		ether_addr_copy(ptr->node.addr, mac);       \
+		hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\
+	}                                                   \
+	ptr;                                                \
+})
+
+#define l2addr_hash_del(ptr) ({                             \
+	hlist_del(&(ptr)->node.hlist);                      \
+	kfree(ptr);                                         \
+})
+
+#ifdef CONFIG_MLX5_MPFS
+int  mlx5_mpfs_init(struct mlx5_core_dev *dev);
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev);
+#else /* #ifndef CONFIG_MLX5_MPFS */
+static inline int  mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {}
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
new file mode 100644
index 0000000000..6b774e0c27
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/pci.h>
+#include "mlx5_core.h"
+#include "pci_vsc.h"
+
+#define MLX5_EXTRACT_C(source, offset, size)	\
+	((((u32)(source)) >> (offset)) & MLX5_ONES32(size))
+#define MLX5_EXTRACT(src, start, len)		\
+	(((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len))
+#define MLX5_ONES32(size)			\
+	((size) ? (0xffffffff >> (32 - (size))) : 0)
+#define MLX5_MASK32(offset, size)		\
+	(MLX5_ONES32(size) << (offset))
+#define MLX5_MERGE_C(rsrc1, rsrc2, start, len)  \
+	((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \
+	((rsrc1) & (~MLX5_MASK32((start), (len)))))
+#define MLX5_MERGE(rsrc1, rsrc2, start, len)	\
+	(((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len))
+#define vsc_read(dev, offset, val) \
+	pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define vsc_write(dev, offset, val) \
+	pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define VSC_MAX_RETRIES 2048
+
+enum {
+	VSC_CTRL_OFFSET = 0x4,
+	VSC_COUNTER_OFFSET = 0x8,
+	VSC_SEMAPHORE_OFFSET = 0xc,
+	VSC_ADDR_OFFSET = 0x10,
+	VSC_DATA_OFFSET = 0x14,
+
+	VSC_FLAG_BIT_OFFS = 31,
+	VSC_FLAG_BIT_LEN = 1,
+
+	VSC_SYND_BIT_OFFS = 30,
+	VSC_SYND_BIT_LEN = 1,
+
+	VSC_ADDR_BIT_OFFS = 0,
+	VSC_ADDR_BIT_LEN = 30,
+
+	VSC_SPACE_BIT_OFFS = 0,
+	VSC_SPACE_BIT_LEN = 16,
+
+	VSC_SIZE_VLD_BIT_OFFS = 28,
+	VSC_SIZE_VLD_BIT_LEN = 1,
+
+	VSC_STATUS_BIT_OFFS = 29,
+	VSC_STATUS_BIT_LEN = 3,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_pf(dev))
+		return;
+
+	dev->vsc_addr = pci_find_capability(dev->pdev,
+					    PCI_CAP_ID_VNDR);
+	if (!dev->vsc_addr)
+		mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n");
+}
+
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev)
+{
+	u32 counter = 0;
+	int retries = 0;
+	u32 lock_val;
+	int ret;
+
+	pci_cfg_access_lock(dev->pdev);
+	do {
+		if (retries > VSC_MAX_RETRIES) {
+			ret = -EBUSY;
+			goto pci_unlock;
+		}
+
+		/* Check if semaphore is already locked */
+		ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+		if (ret)
+			goto pci_unlock;
+
+		if (lock_val) {
+			retries++;
+			usleep_range(1000, 2000);
+			continue;
+		}
+
+		/* Read and write counter value, if written value is
+		 * the same, semaphore was acquired successfully.
+		 */
+		ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter);
+		if (ret)
+			goto pci_unlock;
+
+		ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter);
+		if (ret)
+			goto pci_unlock;
+
+		ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+		if (ret)
+			goto pci_unlock;
+
+		retries++;
+	} while (counter != lock_val);
+
+	return 0;
+
+pci_unlock:
+	pci_cfg_access_unlock(dev->pdev);
+	return ret;
+}
+
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev)
+{
+	int ret;
+
+	ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK);
+	pci_cfg_access_unlock(dev->pdev);
+	return ret;
+}
+
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+			  u32 *ret_space_size)
+{
+	int ret;
+	u32 val = 0;
+
+	if (!mlx5_vsc_accessible(dev))
+		return -EINVAL;
+
+	if (ret_space_size)
+		*ret_space_size = 0;
+
+	/* Get a unique val */
+	ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+	if (ret)
+		goto out;
+
+	/* Try to modify the lock */
+	val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN);
+	ret = vsc_write(dev, VSC_CTRL_OFFSET, val);
+	if (ret)
+		goto out;
+
+	/* Verify lock was modified */
+	ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+	if (ret)
+		goto out;
+
+	if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0)
+		return -EINVAL;
+
+	/* Get space max address if indicated by size valid bit */
+	if (ret_space_size &&
+	    MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) {
+		ret = vsc_read(dev, VSC_ADDR_OFFSET, &val);
+		if (ret) {
+			mlx5_core_warn(dev, "Failed to get max space size\n");
+			goto out;
+		}
+		*ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS,
+					       VSC_ADDR_BIT_LEN);
+	}
+	return 0;
+
+out:
+	return ret;
+}
+
+static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val)
+{
+	int retries = 0;
+	u32 flag;
+	int ret;
+
+	do {
+		if (retries > VSC_MAX_RETRIES)
+			return -EBUSY;
+
+		ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag);
+		if (ret)
+			return ret;
+		flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN);
+		retries++;
+
+		if ((retries & 0xf) == 0)
+			usleep_range(1000, 2000);
+
+	} while (flag != expected_val);
+
+	return 0;
+}
+
+static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address,
+			     u32 data)
+{
+	int ret;
+
+	if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+			 VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+		return -EINVAL;
+
+	/* Set flag to 0x1 */
+	address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1);
+	ret = vsc_write(dev, VSC_DATA_OFFSET, data);
+	if (ret)
+		goto out;
+
+	ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+	if (ret)
+		goto out;
+
+	/* Wait for the flag to be cleared */
+	ret = mlx5_vsc_wait_on_flag(dev, 0);
+
+out:
+	return ret;
+}
+
+static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address,
+			    u32 *data)
+{
+	int ret;
+
+	if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+			 VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+		return -EINVAL;
+
+	ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+	if (ret)
+		goto out;
+
+	ret = mlx5_vsc_wait_on_flag(dev, 1);
+	if (ret)
+		goto out;
+
+	ret = vsc_read(dev, VSC_DATA_OFFSET, data);
+out:
+	return ret;
+}
+
+static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev,
+				 unsigned int read_addr,
+				 unsigned int *next_read_addr,
+				 u32 *data)
+{
+	int ret;
+
+	ret = mlx5_vsc_gw_read(dev, read_addr, data);
+	if (ret)
+		goto out;
+
+	ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr);
+	if (ret)
+		goto out;
+
+	*next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS,
+				       VSC_ADDR_BIT_LEN);
+
+	if (*next_read_addr <= read_addr)
+		ret = -EINVAL;
+out:
+	return ret;
+}
+
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+				int length)
+{
+	unsigned int next_read_addr = 0;
+	unsigned int read_addr = 0;
+
+	while (read_addr < length) {
+		if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr,
+					  &data[(read_addr >> 2)]))
+			return read_addr;
+
+		read_addr = next_read_addr;
+	}
+	return length;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+			   enum mlx5_vsc_state state)
+{
+	u32 data, id = 0;
+	int ret;
+
+	ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL);
+	if (ret) {
+		mlx5_core_warn(dev, "Failed to set gw space %d\n", ret);
+		return ret;
+	}
+
+	if (state == MLX5_VSC_LOCK) {
+		/* Get a unique ID based on the counter */
+		ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id);
+		if (ret)
+			return ret;
+	}
+
+	/* Try to modify lock */
+	ret = mlx5_vsc_gw_write(dev, space, id);
+	if (ret)
+		return ret;
+
+	/* Verify lock was modified */
+	ret = mlx5_vsc_gw_read(dev, space, &data);
+	if (ret)
+		return -EINVAL;
+
+	if (data != id)
+		return -EBUSY;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
new file mode 100644
index 0000000000..64272a6d77
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#ifndef __MLX5_PCI_VSC_H__
+#define __MLX5_PCI_VSC_H__
+
+enum mlx5_vsc_state {
+	MLX5_VSC_UNLOCK,
+	MLX5_VSC_LOCK,
+};
+
+enum {
+	MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+			  u32 *ret_space_size);
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+				int length);
+
+static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev)
+{
+	return !!dev->vsc_addr;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+			   enum mlx5_vsc_state state);
+
+#endif /* __MLX5_PCI_VSC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
new file mode 100644
index 0000000000..4571c56ec3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/port.h>
+#include "mlx5_core.h"
+#include "lib/port_tun.h"
+
+struct mlx5_port_tun_entropy_flags {
+	bool force_supported, force_enabled;
+	bool calc_supported, calc_enabled;
+	bool gre_calc_supported, gre_calc_enabled;
+};
+
+static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev,
+					struct mlx5_port_tun_entropy_flags *entropy_flags)
+{
+	u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+	/* Default values for FW which do not support MLX5_REG_PCMR */
+	entropy_flags->force_supported = false;
+	entropy_flags->calc_supported = false;
+	entropy_flags->gre_calc_supported = false;
+	entropy_flags->force_enabled = false;
+	entropy_flags->calc_enabled = true;
+	entropy_flags->gre_calc_enabled = true;
+
+	if (!MLX5_CAP_GEN(mdev, ports_check))
+		return;
+
+	if (mlx5_query_ports_check(mdev, out, sizeof(out)))
+		return;
+
+	entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap));
+	entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap));
+	entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap));
+	entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force));
+	entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc));
+	entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc));
+}
+
+static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable,
+					  u8 force)
+{
+	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+	int err;
+
+	err = mlx5_query_ports_check(mdev, in, sizeof(in));
+	if (err)
+		return err;
+	MLX5_SET(pcmr_reg, in, local_port, 1);
+	MLX5_SET(pcmr_reg, in, entropy_force, force);
+	MLX5_SET(pcmr_reg, in, entropy_calc, enable);
+	return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev,
+					      u8 enable, u8 force)
+{
+	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+	int err;
+
+	err = mlx5_query_ports_check(mdev, in, sizeof(in));
+	if (err)
+		return err;
+	MLX5_SET(pcmr_reg, in, local_port, 1);
+	MLX5_SET(pcmr_reg, in, entropy_force, force);
+	MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable);
+	return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+				struct mlx5_core_dev *mdev)
+{
+	struct mlx5_port_tun_entropy_flags entropy_flags;
+
+	tun_entropy->mdev = mdev;
+	mutex_init(&tun_entropy->lock);
+	mlx5_query_port_tun_entropy(mdev, &entropy_flags);
+	tun_entropy->num_enabling_entries = 0;
+	tun_entropy->num_disabling_entries = 0;
+	tun_entropy->enabled = entropy_flags.calc_supported ?
+			       entropy_flags.calc_enabled : true;
+}
+
+static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
+			    int reformat_type, bool enable)
+{
+	struct mlx5_port_tun_entropy_flags entropy_flags;
+	int err;
+
+	mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags);
+	/* Tunnel entropy calculation may be controlled either on port basis
+	 * for all tunneling protocols or specifically for GRE protocol.
+	 * Prioritize GRE protocol control (if capable) over global port
+	 * configuration.
+	 */
+	if (entropy_flags.gre_calc_supported &&
+	    reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+		if (!entropy_flags.force_supported)
+			return 0;
+		err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev,
+							 enable, !enable);
+		if (err)
+			return err;
+	} else if (entropy_flags.calc_supported) {
+		/* Other applications may change the global FW entropy
+		 * calculations settings. Check that the current entropy value
+		 * is the negative of the updated value.
+		 */
+		if (entropy_flags.force_enabled &&
+		    enable == entropy_flags.calc_enabled) {
+			mlx5_core_warn(tun_entropy->mdev,
+				       "Unexpected entropy calc setting - expected %d",
+				       !entropy_flags.calc_enabled);
+			return -EOPNOTSUPP;
+		}
+		/* GRE requires disabling entropy calculation. if there are
+		 * enabling entries (i.e VXLAN) we cannot turn it off for them,
+		 * thus fail.
+		 */
+		if (tun_entropy->num_enabling_entries)
+			return -EOPNOTSUPP;
+		err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable,
+						     entropy_flags.force_supported);
+		if (err)
+			return err;
+		tun_entropy->enabled = enable;
+		/* if we turn on the entropy we don't need to force it anymore */
+		if (entropy_flags.force_supported && enable) {
+			err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/* the function manages the refcount for enabling/disabling tunnel types.
+ * the return value indicates if the inc is successful or not, depending on
+ * entropy capabilities and configuration.
+ */
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+				  int reformat_type)
+{
+	int err = -EOPNOTSUPP;
+
+	mutex_lock(&tun_entropy->lock);
+	if ((reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN ||
+	     reformat_type == MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL) &&
+	    tun_entropy->enabled) {
+		/* in case entropy calculation is enabled for all tunneling
+		 * types, it is ok for VXLAN, so approve.
+		 * otherwise keep the error default.
+		 */
+		tun_entropy->num_enabling_entries++;
+		err = 0;
+	} else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+		/* turn off the entropy only for the first GRE rule.
+		 * for the next rules the entropy was already disabled
+		 * successfully.
+		 */
+		if (tun_entropy->num_disabling_entries == 0)
+			err = mlx5_set_entropy(tun_entropy, reformat_type, 0);
+		else
+			err = 0;
+		if (!err)
+			tun_entropy->num_disabling_entries++;
+	}
+	mutex_unlock(&tun_entropy->lock);
+
+	return err;
+}
+
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+				   int reformat_type)
+{
+	mutex_lock(&tun_entropy->lock);
+	if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN)
+		tun_entropy->num_enabling_entries--;
+	else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE &&
+		 --tun_entropy->num_disabling_entries == 0)
+		mlx5_set_entropy(tun_entropy, reformat_type, 1);
+	mutex_unlock(&tun_entropy->lock);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
new file mode 100644
index 0000000000..54c42a8870
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_PORT_TUN_H__
+#define __MLX5_PORT_TUN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_tun_entropy {
+	struct mlx5_core_dev *mdev;
+	u32 num_enabling_entries;
+	u32 num_disabling_entries;
+	u8  enabled;
+	struct mutex lock;	/* lock the entropy fields */
+};
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+				struct mlx5_core_dev *mdev);
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+				  int reformat_type);
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+				   int reformat_type);
+
+#endif /* __MLX5_PORT_TUN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
new file mode 100644
index 0000000000..84e5683861
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#ifndef __LIB_MLX5_SF_H__
+#define __LIB_MLX5_SF_H__
+
+#include <linux/mlx5/driver.h>
+
+static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev)
+{
+	return MLX5_CAP_GEN(dev, sf_base_id);
+}
+
+#ifdef CONFIG_MLX5_SF
+
+static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
+{
+	return MLX5_CAP_GEN(dev, sf);
+}
+
+static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
+{
+	if (!mlx5_sf_supported(dev))
+		return 0;
+	if (MLX5_CAP_GEN(dev, max_num_sf))
+		return MLX5_CAP_GEN(dev, max_num_sf);
+	else
+		return 1 << MLX5_CAP_GEN(dev, log_max_sf);
+}
+
+#else
+
+static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
+{
+	return false;
+}
+
+static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
+{
+	return 0;
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c
new file mode 100644
index 0000000000..9b8c051ccf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+#include "smfs.h"
+
+struct mlx5dr_matcher *
+mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec)
+{
+	struct mlx5dr_match_parameters matcher_mask = {};
+
+	matcher_mask.match_buf = (u64 *)&spec->match_criteria;
+	matcher_mask.match_sz = DR_SZ_MATCH_PARAM;
+
+	return mlx5dr_matcher_create(table, priority, spec->match_criteria_enable, &matcher_mask);
+}
+
+void
+mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher)
+{
+	mlx5dr_matcher_destroy(matcher);
+}
+
+struct mlx5dr_table *
+mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft)
+{
+	return mlx5dr_table_get_from_fs_ft(ft);
+}
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table)
+{
+	return mlx5dr_action_create_dest_table(table);
+}
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_flow_counter(u32 counter_id)
+{
+	return mlx5dr_action_create_flow_counter(counter_id);
+}
+
+void
+mlx5_smfs_action_destroy(struct mlx5dr_action *action)
+{
+	mlx5dr_action_destroy(action);
+}
+
+struct mlx5dr_rule *
+mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec,
+		      size_t num_actions, struct mlx5dr_action *actions[],
+		      u32 flow_source)
+{
+	struct mlx5dr_match_parameters value = {};
+
+	value.match_buf = (u64 *)spec->match_value;
+	value.match_sz = DR_SZ_MATCH_PARAM;
+
+	return mlx5dr_rule_create(matcher, &value, num_actions, actions, flow_source);
+}
+
+void
+mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule)
+{
+	mlx5dr_rule_destroy(rule);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h
new file mode 100644
index 0000000000..452d0df339
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_LIB_SMFS_H__
+#define __MLX5_LIB_SMFS_H__
+
+#include "steering/mlx5dr.h"
+#include "steering/dr_types.h"
+
+struct mlx5dr_matcher *
+mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec);
+
+void
+mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher);
+
+struct mlx5dr_table *
+mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft);
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table);
+
+struct mlx5dr_action *
+mlx5_smfs_action_create_flow_counter(u32 counter_id);
+
+void
+mlx5_smfs_action_destroy(struct mlx5dr_action *action);
+
+struct mlx5dr_rule *
+mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec,
+		      size_t num_actions, struct mlx5dr_action *actions[],
+		      u32 flow_source);
+
+void
+mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule);
+
+#endif /* __MLX5_LIB_SMFS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
new file mode 100644
index 0000000000..e223e0e464
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/driver.h>
+#include "lib/tout.h"
+
+struct mlx5_timeouts {
+	u64 to[MAX_TIMEOUT_TYPES];
+};
+
+static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = {
+	[MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000,
+	[MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000,
+	[MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000,
+	[MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2,
+	[MLX5_TO_FW_INIT_MS] = 2000,
+	[MLX5_TO_CMD_MS] = 60000,
+	[MLX5_TO_PCI_TOGGLE_MS] =  2000,
+	[MLX5_TO_HEALTH_POLL_INTERVAL_MS] =  2000,
+	[MLX5_TO_FULL_CRDUMP_MS] = 60000,
+	[MLX5_TO_FW_RESET_MS] = 60000,
+	[MLX5_TO_FLUSH_ON_ERROR_MS] = 2000,
+	[MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000,
+	[MLX5_TO_TEARDOWN_MS] = 3000,
+	[MLX5_TO_FSM_REACTIVATE_MS] = 5000,
+	[MLX5_TO_RECLAIM_PAGES_MS] = 5000,
+	[MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000,
+	[MLX5_TO_RESET_UNLOAD_MS] = 300000
+};
+
+static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type)
+{
+	dev->timeouts->to[type] = val;
+}
+
+int mlx5_tout_init(struct mlx5_core_dev *dev)
+{
+	int i;
+
+	dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL);
+	if (!dev->timeouts)
+		return -ENOMEM;
+
+	for (i = 0; i < MAX_TIMEOUT_TYPES; i++)
+		tout_set(dev, tout_def_sw_val[i], i);
+
+	return 0;
+}
+
+void mlx5_tout_cleanup(struct mlx5_core_dev *dev)
+{
+	kfree(dev->timeouts);
+}
+
+/* Time register consists of two fields to_multiplier(time out multiplier)
+ * and to_value(time out value). to_value is the quantity of the time units and
+ * to_multiplier is the type and should be one off these four values.
+ * 0x0: millisecond
+ * 0x1: seconds
+ * 0x2: minutes
+ * 0x3: hours
+ * this function converts the time stored in the two register fields into
+ * millisecond.
+ */
+static u64 tout_convert_reg_field_to_ms(u32 to_mul, u32 to_val)
+{
+	u64 msec = to_val;
+
+	to_mul &= 0x3;
+	/* convert hours/minutes/seconds to miliseconds */
+	if (to_mul)
+		msec *= 1000 * int_pow(60, to_mul - 1);
+
+	return msec;
+}
+
+static u64 tout_convert_iseg_to_ms(u32 iseg_to)
+{
+	return tout_convert_reg_field_to_ms(iseg_to >> 29, iseg_to & 0xfffff);
+}
+
+static bool tout_is_supported(struct mlx5_core_dev *dev)
+{
+	return !!ioread32be(&dev->iseg->cmd_q_init_to);
+}
+
+void mlx5_tout_query_iseg(struct mlx5_core_dev *dev)
+{
+	u32 to;
+
+	if (!tout_is_supported(dev))
+		return;
+
+	to = ioread32be(&dev->iseg->cmd_q_init_to);
+	tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_FW_INIT_MS);
+
+	to = ioread32be(&dev->iseg->cmd_exec_to);
+	tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_CMD_MS);
+}
+
+u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type)
+{
+	return dev->timeouts->to[type];
+}
+
+#define MLX5_TIMEOUT_QUERY(fld, reg_out) \
+	({ \
+	struct mlx5_ifc_default_timeout_bits *time_field; \
+	u32 to_multi, to_value; \
+	u64 to_val_ms; \
+	\
+	time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \
+	to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \
+	to_value = MLX5_GET(default_timeout, time_field, to_value); \
+	to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \
+	to_val_ms; \
+	})
+
+#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \
+	({ \
+	u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \
+	if (fw_to) \
+		tout_set(dev, fw_to + (to_extra), to_type); \
+	fw_to; \
+	})
+
+static int tout_query_dtor(struct mlx5_core_dev *dev)
+{
+	u64 pcie_toggle_to_val, tear_down_to_val;
+	u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {};
+	int err;
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0);
+	if (err)
+		return err;
+
+	pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0);
+	MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val);
+
+	tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0);
+	MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS,
+			  tear_down_to_val);
+
+	MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0);
+	MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0);
+	MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0);
+	MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0);
+	MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0);
+	MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0);
+	MLX5_TIMEOUT_FILL(reset_unload_to, out, dev, MLX5_TO_RESET_UNLOAD_MS, 0);
+
+	return 0;
+}
+
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev)
+{
+	if (tout_is_supported(dev))
+		return tout_query_dtor(dev);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
new file mode 100644
index 0000000000..99e0a05526
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef MLX5_TIMEOUTS_H
+#define MLX5_TIMEOUTS_H
+
+enum mlx5_timeouts_types {
+	/* pre init timeouts (not read from FW) */
+	MLX5_TO_FW_PRE_INIT_TIMEOUT_MS,
+	MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS,
+	MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS,
+	MLX5_TO_FW_PRE_INIT_WAIT_MS,
+
+	/* init segment timeouts */
+	MLX5_TO_FW_INIT_MS,
+	MLX5_TO_CMD_MS,
+
+	/* DTOR timeouts */
+	MLX5_TO_PCI_TOGGLE_MS,
+	MLX5_TO_HEALTH_POLL_INTERVAL_MS,
+	MLX5_TO_FULL_CRDUMP_MS,
+	MLX5_TO_FW_RESET_MS,
+	MLX5_TO_FLUSH_ON_ERROR_MS,
+	MLX5_TO_PCI_SYNC_UPDATE_MS,
+	MLX5_TO_TEARDOWN_MS,
+	MLX5_TO_FSM_REACTIVATE_MS,
+	MLX5_TO_RECLAIM_PAGES_MS,
+	MLX5_TO_RECLAIM_VFS_PAGES_MS,
+	MLX5_TO_RESET_UNLOAD_MS,
+
+	MAX_TIMEOUT_TYPES
+};
+
+struct mlx5_core_dev;
+int mlx5_tout_init(struct mlx5_core_dev *dev);
+void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
+void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev);
+u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
+
+#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)
+
+# endif /* MLX5_TIMEOUTS_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
new file mode 100644
index 0000000000..d55e15c1f3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/refcount.h>
+#include <linux/mlx5/driver.h>
+#include <net/vxlan.h>
+#include "mlx5_core.h"
+#include "vxlan.h"
+
+struct mlx5_vxlan {
+	struct mlx5_core_dev		*mdev;
+	/* max_num_ports is usually 4, 16 buckets is more than enough */
+	DECLARE_HASHTABLE(htable, 4);
+	struct mutex                    sync_lock; /* sync add/del port HW operations */
+};
+
+struct mlx5_vxlan_port {
+	struct hlist_node hlist;
+	u16 udp_port;
+};
+
+static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {};
+
+	MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
+		 MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
+	MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+	return mlx5_cmd_exec_in(mdev, add_vxlan_udp_dport, in);
+}
+
+static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {};
+
+	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
+		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+	MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+	return mlx5_cmd_exec_in(mdev, delete_vxlan_udp_dport, in);
+}
+
+bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	bool found = false;
+
+	if (!mlx5_vxlan_allowed(vxlan))
+		return NULL;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(vxlan->htable, vxlanp, hlist, port)
+		if (vxlanp->udp_port == port) {
+			found = true;
+			break;
+		}
+	rcu_read_unlock();
+
+	return found;
+}
+
+static struct mlx5_vxlan_port *vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+
+	hash_for_each_possible(vxlan->htable, vxlanp, hlist, port)
+		if (vxlanp->udp_port == port)
+			return vxlanp;
+	return NULL;
+}
+
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	int ret;
+
+	vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
+	if (!vxlanp)
+		return -ENOMEM;
+	vxlanp->udp_port = port;
+
+	ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
+	if (ret) {
+		kfree(vxlanp);
+		return ret;
+	}
+
+	mutex_lock(&vxlan->sync_lock);
+	hash_add_rcu(vxlan->htable, &vxlanp->hlist, port);
+	mutex_unlock(&vxlan->sync_lock);
+
+	return 0;
+}
+
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	int ret = 0;
+
+	mutex_lock(&vxlan->sync_lock);
+
+	vxlanp = vxlan_lookup_port(vxlan, port);
+	if (WARN_ON(!vxlanp)) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	hash_del_rcu(&vxlanp->hlist);
+	synchronize_rcu();
+	mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+	kfree(vxlanp);
+
+out_unlock:
+	mutex_unlock(&vxlan->sync_lock);
+	return ret;
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_vxlan *vxlan;
+
+	if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev))
+		return ERR_PTR(-ENOTSUPP);
+
+	vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
+	if (!vxlan)
+		return ERR_PTR(-ENOMEM);
+
+	vxlan->mdev = mdev;
+	mutex_init(&vxlan->sync_lock);
+	hash_init(vxlan->htable);
+
+	/* Hardware adds 4789 (IANA_VXLAN_UDP_PORT) by default */
+	mlx5_vxlan_add_port(vxlan, IANA_VXLAN_UDP_PORT);
+
+	return vxlan;
+}
+
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
+{
+	if (!mlx5_vxlan_allowed(vxlan))
+		return;
+
+	mlx5_vxlan_del_port(vxlan, IANA_VXLAN_UDP_PORT);
+	WARN_ON(!hash_empty(vxlan->htable));
+
+	kfree(vxlan);
+}
+
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	struct hlist_node *tmp;
+	int bkt;
+
+	if (!mlx5_vxlan_allowed(vxlan))
+		return;
+
+	hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
+		/* Don't delete default UDP port added by the HW.
+		 * Remove only user configured ports
+		 */
+		if (vxlanp->udp_port == IANA_VXLAN_UDP_PORT)
+			continue;
+		mlx5_vxlan_del_port(vxlan, vxlanp->udp_port);
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
new file mode 100644
index 0000000000..34ef662da3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_VXLAN_H__
+#define __MLX5_VXLAN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vxlan;
+struct mlx5_vxlan_port;
+
+static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+	return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
+static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
+{
+	/* not allowed reason is encoded in vxlan pointer as error,
+	 * on mlx5_vxlan_create
+	 */
+	return !IS_ERR_OR_NULL(vxlan);
+}
+
+#if IS_ENABLED(CONFIG_VXLAN)
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
+bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan);
+#else
+static inline struct mlx5_vxlan*
+mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
+static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
+static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
+static inline bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return false; }
+static inline void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) { return; }
+#endif
+
+#endif /* __MLX5_VXLAN_H__ */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/net/ethernet/mellanox/mlx5/core/lib
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip