summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
commit76cb841cb886eef6b3bee341a2266c76578724ad (patch)
treef5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /drivers/net/ethernet/mellanox/mlx5
parentInitial commit. (diff)
downloadlinux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz
linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip
Adding upstream version 4.19.249.upstream/4.19.249
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig120
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c109
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h88
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c80
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c311
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c1919
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c221
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c593
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c498
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h286
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c950
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h175
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h979
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h212
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c235
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c357
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c325
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h80
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c547
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h161
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c401
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c133
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h106
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c383
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c710
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c194
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c1206
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dim.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c1701
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c1576
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c844
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c5221
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c1295
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h169
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c1470
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c355
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c1404
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h284
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c3063
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c730
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c144
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c991
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c2219
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h319
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c1368
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c238
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c1047
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c327
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h114
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c1533
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h142
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h212
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c622
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c768
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h101
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c2720
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h286
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c360
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c527
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c401
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c258
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c697
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c357
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c691
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c616
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c158
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c202
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h95
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c230
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mad.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1721
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mcg.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h217
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c193
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c596
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pd.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c1116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c636
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c288
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c282
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c692
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c621
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c329
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c1203
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h261
114 files changed, 57917 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
new file mode 100644
index 000000000..b7e3b8902
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -0,0 +1,120 @@
+#
+# Mellanox driver configuration
+#
+
+config MLX5_CORE
+ tristate "Mellanox 5th generation network adapters (ConnectX series) core driver"
+ depends on MAY_USE_DEVLINK
+ depends on PCI
+ imply PTP_1588_CLOCK
+ imply VXLAN
+ imply MLXFW
+ default n
+ ---help---
+ Core driver for low level functionality of the ConnectX-4 and
+ Connect-IB cards by Mellanox Technologies.
+
+config MLX5_ACCEL
+ bool
+
+config MLX5_FPGA
+ bool "Mellanox Technologies Innova support"
+ depends on MLX5_CORE
+ select MLX5_ACCEL
+ ---help---
+ Build support for the Innova family of network cards by Mellanox
+ Technologies. Innova network cards are comprised of a ConnectX chip
+ and an FPGA chip on one board. If you select this option, the
+ mlx5_core driver will include the Innova FPGA core and allow building
+ sandbox-specific client drivers.
+
+config MLX5_CORE_EN
+ bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support"
+ depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
+ depends on IPV6=y || IPV6=n || MLX5_CORE=m
+ select PAGE_POOL
+ default n
+ ---help---
+ Ethernet support in Mellanox Technologies ConnectX-4 NIC.
+
+config MLX5_EN_ARFS
+ bool "Mellanox MLX5 ethernet accelerated receive flow steering (ARFS) support"
+ depends on MLX5_CORE_EN && RFS_ACCEL
+ default y
+ ---help---
+ Mellanox MLX5 ethernet hardware-accelerated receive flow steering support,
+ Enables ethernet netdevice arfs support and ntuple filtering.
+
+config MLX5_EN_RXNFC
+ bool "Mellanox MLX5 ethernet rx nfc flow steering support"
+ depends on MLX5_CORE_EN
+ default y
+ ---help---
+ Mellanox MLX5 ethernet rx nfc flow steering support
+ Enables ethtool receive network flow classification, which allows user defined
+ flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc
+ API.
+
+config MLX5_MPFS
+ bool "Mellanox Technologies MLX5 MPFS support"
+ depends on MLX5_CORE_EN
+ default y
+ ---help---
+ Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
+ support in ConnectX NIC. MPFs is required for when multi-PF configuration
+ is enabled to allow passing user configured unicast MAC addresses to the
+ requesting PF.
+
+config MLX5_ESWITCH
+ bool "Mellanox Technologies MLX5 SRIOV E-Switch support"
+ depends on MLX5_CORE_EN && NET_SWITCHDEV
+ default y
+ ---help---
+ Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC.
+ E-Switch provides internal SRIOV packet steering and switching for the
+ enabled VFs and PF in two available modes:
+ Legacy SRIOV mode (L2 mac vlan steering based).
+ Switchdev mode (eswitch offloads).
+
+config MLX5_CORE_EN_DCB
+ bool "Data Center Bridging (DCB) Support"
+ default y
+ depends on MLX5_CORE_EN && DCB
+ ---help---
+ Say Y here if you want to use Data Center Bridging (DCB) in the
+ driver.
+ If set to N, will not be able to configure QoS and ratelimit attributes.
+ This flag is depended on the kernel's DCB support.
+
+ If unsure, set to Y
+
+config MLX5_CORE_IPOIB
+ bool "Mellanox 5th generation network adapters (connectX series) IPoIB offloads support"
+ depends on MLX5_CORE_EN
+ default n
+ ---help---
+ MLX5 IPoIB offloads & acceleration support.
+
+config MLX5_EN_IPSEC
+ bool "IPSec XFRM cryptography-offload accelaration"
+ depends on MLX5_ACCEL
+ depends on MLX5_CORE_EN
+ depends on XFRM_OFFLOAD
+ depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+ default n
+ ---help---
+ Build support for IPsec cryptography-offload accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
+
+config MLX5_EN_TLS
+ bool "TLS cryptography-offload accelaration"
+ depends on MLX5_CORE_EN
+ depends on TLS_DEVICE
+ depends on TLS=y || MLX5_CORE=m
+ depends on MLX5_ACCEL
+ default n
+ ---help---
+ Build support for TLS cryptography-offload accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
new file mode 100644
index 000000000..d324a3884
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox 5th generation network adapters
+# (ConnectX series) core & netdev driver
+#
+
+subdir-ccflags-y += -I$(src)
+
+obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
+
+#
+# mlx5 core basic
+#
+mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
+ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
+ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+ fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
+ diag/fs_tracepoint.o diag/fw_tracer.o
+
+#
+# Netdev basic
+#
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
+ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
+ en_selftest.o en/port.o
+
+#
+# Netdev extra
+#
+mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
+mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o
+
+#
+# Core extra
+#
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o
+mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
+mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
+mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+
+#
+# Ipoib netdev
+#
+mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o
+
+#
+# Accelerations & FPGA
+#
+mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
+
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
+ fpga/ipsec.o fpga/tls.o
+
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
+ en_accel/ipsec_stats.o
+
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o
+
+CFLAGS_tracepoint.o := -I$(src)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
new file mode 100644
index 000000000..c13260467
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
@@ -0,0 +1,37 @@
+#ifndef __MLX5E_ACCEL_H__
+#define __MLX5E_ACCEL_H__
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "en.h"
+
+static inline bool is_metadata_hdr_valid(struct sk_buff *skb)
+{
+ __be16 *ethtype;
+
+ if (unlikely(skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN))
+ return false;
+ ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
+ if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
+ return false;
+ return true;
+}
+
+static inline void remove_metadata_hdr(struct sk_buff *skb)
+{
+ struct ethhdr *old_eth;
+ struct ethhdr *new_eth;
+
+ /* Remove the metadata from the buffer */
+ old_eth = (struct ethhdr *)skb->data;
+ new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
+ memmove(new_eth, old_eth, 2 * ETH_ALEN);
+ /* Ethertype is already in its new place */
+ skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
+}
+
+#endif /* CONFIG_MLX5_ACCEL */
+
+#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
new file mode 100644
index 000000000..9f1b19397
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/ipsec.h"
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_ipsec_device_caps(mdev);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps);
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_ipsec_counters_count(mdev);
+}
+
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int count)
+{
+ return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
+}
+
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6)
+{
+ return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr,
+ spi, is_ipv6);
+}
+
+void mlx5_accel_esp_free_hw_context(void *context)
+{
+ mlx5_fpga_ipsec_delete_sa_ctx(context);
+}
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_ipsec_init(mdev);
+}
+
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+ mlx5_fpga_ipsec_cleanup(mdev);
+}
+
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 flags)
+{
+ struct mlx5_accel_esp_xfrm *xfrm;
+
+ xfrm = mlx5_fpga_esp_create_xfrm(mdev, attrs, flags);
+ if (IS_ERR(xfrm))
+ return xfrm;
+
+ xfrm->mdev = mdev;
+ return xfrm;
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm);
+
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+ mlx5_fpga_esp_destroy_xfrm(xfrm);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
+
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
new file mode 100644
index 000000000..024dbd22a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_IPSEC_H__
+#define __MLX5_ACCEL_IPSEC_H__
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/accel.h>
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int count);
+
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6);
+void mlx5_accel_esp_free_hw_context(void *context);
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+#define MLX5_IPSEC_DEV(mdev) false
+
+static inline void *
+mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6)
+{
+ return NULL;
+}
+
+static inline void mlx5_accel_esp_free_hw_context(void *context)
+{
+}
+
+static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif
+
+#endif /* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
new file mode 100644
index 000000000..da7bd2636
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/tls.h"
+#include "mlx5_core.h"
+#include "fpga/tls.h"
+
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx)
+{
+ return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info,
+ start_offload_tcp_sn, p_swid,
+ direction_sx);
+}
+
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ bool direction_sx)
+{
+ mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx);
+}
+
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+ u64 rcd_sn)
+{
+ return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn);
+}
+
+bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_is_tls_device(mdev);
+}
+
+u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_tls_device_caps(mdev);
+}
+
+int mlx5_accel_tls_init(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_tls_init(mdev);
+}
+
+void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
+{
+ mlx5_fpga_tls_cleanup(mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
new file mode 100644
index 000000000..def4093eb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_TLS_H__
+#define __MLX5_ACCEL_TLS_H__
+
+#include <linux/mlx5/driver.h>
+#include <linux/tls.h>
+
+#ifdef CONFIG_MLX5_ACCEL
+
+enum {
+ MLX5_ACCEL_TLS_TX = BIT(0),
+ MLX5_ACCEL_TLS_RX = BIT(1),
+ MLX5_ACCEL_TLS_V12 = BIT(2),
+ MLX5_ACCEL_TLS_V13 = BIT(3),
+ MLX5_ACCEL_TLS_LRO = BIT(4),
+ MLX5_ACCEL_TLS_IPV6 = BIT(5),
+ MLX5_ACCEL_TLS_AES_GCM128 = BIT(30),
+ MLX5_ACCEL_TLS_AES_GCM256 = BIT(31),
+};
+
+struct mlx5_ifc_tls_flow_bits {
+ u8 src_port[0x10];
+ u8 dst_port[0x10];
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+ u8 ipv6[0x1];
+ u8 direction_sx[0x1];
+ u8 reserved_at_2[0x1e];
+};
+
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx);
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ bool direction_sx);
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+ u64 rcd_sn);
+bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
+u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
+int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline int
+mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx) { return -ENOTSUPP; }
+static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ bool direction_sx) { }
+static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
+ u32 seq, u64 rcd_sn) { return 0; }
+static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
+static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
+static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
+
+#endif
+
+#endif /* __MLX5_ACCEL_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
new file mode 100644
index 000000000..456f30007
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+
+struct mlx5_db_pgdir {
+ struct list_head list;
+ unsigned long *bitmap;
+ __be32 *db_page;
+ dma_addr_t db_dma;
+};
+
+/* Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0.
+ */
+
+static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
+ size_t size, dma_addr_t *dma_handle,
+ int node)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ int original_node;
+ void *cpu_handle;
+
+ mutex_lock(&priv->alloc_mutex);
+ original_node = dev_to_node(&dev->pdev->dev);
+ set_dev_node(&dev->pdev->dev, node);
+ cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size,
+ dma_handle, GFP_KERNEL);
+ set_dev_node(&dev->pdev->dev, original_node);
+ mutex_unlock(&priv->alloc_mutex);
+ return cpu_handle;
+}
+
+int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+ struct mlx5_frag_buf *buf, int node)
+{
+ dma_addr_t t;
+
+ buf->size = size;
+ buf->npages = 1;
+ buf->page_shift = (u8)get_order(size) + PAGE_SHIFT;
+
+ buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
+ if (!buf->frags)
+ return -ENOMEM;
+
+ buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size,
+ &t, node);
+ if (!buf->frags->buf)
+ goto err_out;
+
+ buf->frags->map = t;
+
+ while (t & ((1 << buf->page_shift) - 1)) {
+ --buf->page_shift;
+ buf->npages *= 2;
+ }
+
+ return 0;
+err_out:
+ kfree(buf->frags);
+ return -ENOMEM;
+}
+
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+ int size, struct mlx5_frag_buf *buf)
+{
+ return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
+}
+EXPORT_SYMBOL(mlx5_buf_alloc);
+
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
+{
+ dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf,
+ buf->frags->map);
+
+ kfree(buf->frags);
+}
+EXPORT_SYMBOL_GPL(mlx5_buf_free);
+
+int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+ struct mlx5_frag_buf *buf, int node)
+{
+ int i;
+
+ buf->size = size;
+ buf->npages = DIV_ROUND_UP(size, PAGE_SIZE);
+ buf->page_shift = PAGE_SHIFT;
+ buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list),
+ GFP_KERNEL);
+ if (!buf->frags)
+ goto err_out;
+
+ for (i = 0; i < buf->npages; i++) {
+ struct mlx5_buf_list *frag = &buf->frags[i];
+ int frag_sz = min_t(int, size, PAGE_SIZE);
+
+ frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz,
+ &frag->map, node);
+ if (!frag->buf)
+ goto err_free_buf;
+ if (frag->map & ((1 << buf->page_shift) - 1)) {
+ dma_free_coherent(&dev->pdev->dev, frag_sz,
+ buf->frags[i].buf, buf->frags[i].map);
+ mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n",
+ &frag->map, buf->page_shift);
+ goto err_free_buf;
+ }
+ size -= frag_sz;
+ }
+
+ return 0;
+
+err_free_buf:
+ while (i--)
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->frags[i].buf,
+ buf->frags[i].map);
+ kfree(buf->frags);
+err_out:
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);
+
+void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
+{
+ int size = buf->size;
+ int i;
+
+ for (i = 0; i < buf->npages; i++) {
+ int frag_sz = min_t(int, size, PAGE_SIZE);
+
+ dma_free_coherent(&dev->pdev->dev, frag_sz, buf->frags[i].buf,
+ buf->frags[i].map);
+ size -= frag_sz;
+ }
+ kfree(buf->frags);
+}
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_free);
+
+static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
+ int node)
+{
+ u32 db_per_page = PAGE_SIZE / cache_line_size();
+ struct mlx5_db_pgdir *pgdir;
+
+ pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+ if (!pgdir)
+ return NULL;
+
+ pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+
+ if (!pgdir->bitmap) {
+ kfree(pgdir);
+ return NULL;
+ }
+
+ bitmap_fill(pgdir->bitmap, db_per_page);
+
+ pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE,
+ &pgdir->db_dma, node);
+ if (!pgdir->db_page) {
+ kfree(pgdir->bitmap);
+ kfree(pgdir);
+ return NULL;
+ }
+
+ return pgdir;
+}
+
+static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir,
+ struct mlx5_db *db)
+{
+ u32 db_per_page = PAGE_SIZE / cache_line_size();
+ int offset;
+ int i;
+
+ i = find_first_bit(pgdir->bitmap, db_per_page);
+ if (i >= db_per_page)
+ return -ENOMEM;
+
+ __clear_bit(i, pgdir->bitmap);
+
+ db->u.pgdir = pgdir;
+ db->index = i;
+ offset = db->index * cache_line_size();
+ db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page);
+ db->dma = pgdir->db_dma + offset;
+
+ db->db[0] = 0;
+ db->db[1] = 0;
+
+ return 0;
+}
+
+int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node)
+{
+ struct mlx5_db_pgdir *pgdir;
+ int ret = 0;
+
+ mutex_lock(&dev->priv.pgdir_mutex);
+
+ list_for_each_entry(pgdir, &dev->priv.pgdir_list, list)
+ if (!mlx5_alloc_db_from_pgdir(pgdir, db))
+ goto out;
+
+ pgdir = mlx5_alloc_db_pgdir(dev, node);
+ if (!pgdir) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&pgdir->list, &dev->priv.pgdir_list);
+
+ /* This should never fail -- we just allocated an empty page: */
+ WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db));
+
+out:
+ mutex_unlock(&dev->priv.pgdir_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx5_db_alloc_node);
+
+int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+ return mlx5_db_alloc_node(dev, db, dev->priv.numa_node);
+}
+EXPORT_SYMBOL_GPL(mlx5_db_alloc);
+
+void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+ u32 db_per_page = PAGE_SIZE / cache_line_size();
+
+ mutex_lock(&dev->priv.pgdir_mutex);
+
+ __set_bit(db->index, db->u.pgdir->bitmap);
+
+ if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) {
+ dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ db->u.pgdir->db_page, db->u.pgdir->db_dma);
+ list_del(&db->u.pgdir->list);
+ kfree(db->u.pgdir->bitmap);
+ kfree(db->u.pgdir);
+ }
+
+ mutex_unlock(&dev->priv.pgdir_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx5_db_free);
+
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
+{
+ u64 addr;
+ int i;
+
+ for (i = 0; i < buf->npages; i++) {
+ addr = buf->frags->map + (i << buf->page_shift);
+
+ pas[i] = cpu_to_be64(addr);
+ }
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
+
+void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas)
+{
+ int i;
+
+ for (i = 0; i < buf->npages; i++)
+ pas[i] = cpu_to_be64(buf->frags[i].map);
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
new file mode 100644
index 000000000..a68608276
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -0,0 +1,1919 @@
+/*
+ * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/random.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include <linux/debugfs.h>
+
+#include "mlx5_core.h"
+
+enum {
+ CMD_IF_REV = 5,
+};
+
+enum {
+ CMD_MODE_POLLING,
+ CMD_MODE_EVENTS
+};
+
+enum {
+ MLX5_CMD_DELIVERY_STAT_OK = 0x0,
+ MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1,
+ MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2,
+ MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3,
+ MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4,
+ MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5,
+ MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6,
+ MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7,
+ MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8,
+ MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9,
+ MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10,
+};
+
+static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
+ struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out,
+ void *uout, int uout_size,
+ mlx5_cmd_cbk_t cbk,
+ void *context, int page_queue)
+{
+ gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
+ struct mlx5_cmd_work_ent *ent;
+
+ ent = kzalloc(sizeof(*ent), alloc_flags);
+ if (!ent)
+ return ERR_PTR(-ENOMEM);
+
+ ent->in = in;
+ ent->out = out;
+ ent->uout = uout;
+ ent->uout_size = uout_size;
+ ent->callback = cbk;
+ ent->context = context;
+ ent->cmd = cmd;
+ ent->page_queue = page_queue;
+
+ return ent;
+}
+
+static u8 alloc_token(struct mlx5_cmd *cmd)
+{
+ u8 token;
+
+ spin_lock(&cmd->token_lock);
+ cmd->token++;
+ if (cmd->token == 0)
+ cmd->token++;
+ token = cmd->token;
+ spin_unlock(&cmd->token_lock);
+
+ return token;
+}
+
+static int alloc_ent(struct mlx5_cmd *cmd)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds);
+ if (ret < cmd->max_reg_cmds)
+ clear_bit(ret, &cmd->bitmask);
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+
+ return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
+}
+
+static void free_ent(struct mlx5_cmd *cmd, int idx)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ set_bit(idx, &cmd->bitmask);
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+}
+
+static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
+{
+ return cmd->cmd_buf + (idx << cmd->log_stride);
+}
+
+static int mlx5_calc_cmd_blocks(struct mlx5_cmd_msg *msg)
+{
+ int size = msg->len;
+ int blen = size - min_t(int, sizeof(msg->first.data), size);
+
+ return DIV_ROUND_UP(blen, MLX5_CMD_DATA_BLOCK_SIZE);
+}
+
+static u8 xor8_buf(void *buf, size_t offset, int len)
+{
+ u8 *ptr = buf;
+ u8 sum = 0;
+ int i;
+ int end = len + offset;
+
+ for (i = offset; i < end; i++)
+ sum ^= ptr[i];
+
+ return sum;
+}
+
+static int verify_block_sig(struct mlx5_cmd_prot_block *block)
+{
+ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+ int xor_len = sizeof(*block) - sizeof(block->data) - 1;
+
+ if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
+ return -EINVAL;
+
+ if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void calc_block_sig(struct mlx5_cmd_prot_block *block)
+{
+ int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2;
+ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+
+ block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len);
+ block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1);
+}
+
+static void calc_chain_sig(struct mlx5_cmd_msg *msg)
+{
+ struct mlx5_cmd_mailbox *next = msg->next;
+ int n = mlx5_calc_cmd_blocks(msg);
+ int i = 0;
+
+ for (i = 0; i < n && next; i++) {
+ calc_block_sig(next->buf);
+ next = next->next;
+ }
+}
+
+static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
+{
+ ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay));
+ if (csum) {
+ calc_chain_sig(ent->in);
+ calc_chain_sig(ent->out);
+ }
+}
+
+static void poll_timeout(struct mlx5_cmd_work_ent *ent)
+{
+ unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000);
+ u8 own;
+
+ do {
+ own = READ_ONCE(ent->lay->status_own);
+ if (!(own & CMD_OWNER_HW)) {
+ ent->ret = 0;
+ return;
+ }
+ cond_resched();
+ } while (time_before(jiffies, poll_end));
+
+ ent->ret = -ETIMEDOUT;
+}
+
+static void free_cmd(struct mlx5_cmd_work_ent *ent)
+{
+ kfree(ent);
+}
+
+static int verify_signature(struct mlx5_cmd_work_ent *ent)
+{
+ struct mlx5_cmd_mailbox *next = ent->out->next;
+ int n = mlx5_calc_cmd_blocks(ent->out);
+ int err;
+ u8 sig;
+ int i = 0;
+
+ sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
+ if (sig != 0xff)
+ return -EINVAL;
+
+ for (i = 0; i < n && next; i++) {
+ err = verify_block_sig(next->buf);
+ if (err)
+ return err;
+
+ next = next->next;
+ }
+
+ return 0;
+}
+
+static void dump_buf(void *buf, int size, int data_only, int offset)
+{
+ __be32 *p = buf;
+ int i;
+
+ for (i = 0; i < size; i += 16) {
+ pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]),
+ be32_to_cpu(p[1]), be32_to_cpu(p[2]),
+ be32_to_cpu(p[3]));
+ p += 4;
+ offset += 16;
+ }
+ if (!data_only)
+ pr_debug("\n");
+}
+
+static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
+ u32 *synd, u8 *status)
+{
+ *synd = 0;
+ *status = 0;
+
+ switch (op) {
+ case MLX5_CMD_OP_TEARDOWN_HCA:
+ case MLX5_CMD_OP_DISABLE_HCA:
+ case MLX5_CMD_OP_MANAGE_PAGES:
+ case MLX5_CMD_OP_DESTROY_MKEY:
+ case MLX5_CMD_OP_DESTROY_EQ:
+ case MLX5_CMD_OP_DESTROY_CQ:
+ case MLX5_CMD_OP_DESTROY_QP:
+ case MLX5_CMD_OP_DESTROY_PSV:
+ case MLX5_CMD_OP_DESTROY_SRQ:
+ case MLX5_CMD_OP_DESTROY_XRC_SRQ:
+ case MLX5_CMD_OP_DESTROY_XRQ:
+ case MLX5_CMD_OP_DESTROY_DCT:
+ case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT:
+ case MLX5_CMD_OP_DEALLOC_PD:
+ case MLX5_CMD_OP_DEALLOC_UAR:
+ case MLX5_CMD_OP_DETACH_FROM_MCG:
+ case MLX5_CMD_OP_DEALLOC_XRCD:
+ case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN:
+ case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_DESTROY_LAG:
+ case MLX5_CMD_OP_DESTROY_VPORT_LAG:
+ case MLX5_CMD_OP_DESTROY_TIR:
+ case MLX5_CMD_OP_DESTROY_SQ:
+ case MLX5_CMD_OP_DESTROY_RQ:
+ case MLX5_CMD_OP_DESTROY_RMP:
+ case MLX5_CMD_OP_DESTROY_TIS:
+ case MLX5_CMD_OP_DESTROY_RQT:
+ case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+ case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
+ case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_2ERR_QP:
+ case MLX5_CMD_OP_2RST_QP:
+ case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
+ case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_FPGA_DESTROY_QP:
+ case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
+ return MLX5_CMD_STAT_OK;
+
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_ADAPTER:
+ case MLX5_CMD_OP_INIT_HCA:
+ case MLX5_CMD_OP_ENABLE_HCA:
+ case MLX5_CMD_OP_QUERY_PAGES:
+ case MLX5_CMD_OP_SET_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_ISSI:
+ case MLX5_CMD_OP_SET_ISSI:
+ case MLX5_CMD_OP_CREATE_MKEY:
+ case MLX5_CMD_OP_QUERY_MKEY:
+ case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
+ case MLX5_CMD_OP_PAGE_FAULT_RESUME:
+ case MLX5_CMD_OP_CREATE_EQ:
+ case MLX5_CMD_OP_QUERY_EQ:
+ case MLX5_CMD_OP_GEN_EQE:
+ case MLX5_CMD_OP_CREATE_CQ:
+ case MLX5_CMD_OP_QUERY_CQ:
+ case MLX5_CMD_OP_MODIFY_CQ:
+ case MLX5_CMD_OP_CREATE_QP:
+ case MLX5_CMD_OP_RST2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ case MLX5_CMD_OP_QUERY_QP:
+ case MLX5_CMD_OP_SQD_RTS_QP:
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_CREATE_PSV:
+ case MLX5_CMD_OP_CREATE_SRQ:
+ case MLX5_CMD_OP_QUERY_SRQ:
+ case MLX5_CMD_OP_ARM_RQ:
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ case MLX5_CMD_OP_ARM_XRC_SRQ:
+ case MLX5_CMD_OP_CREATE_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_CREATE_DCT:
+ case MLX5_CMD_OP_DRAIN_DCT:
+ case MLX5_CMD_OP_QUERY_DCT:
+ case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ case MLX5_CMD_OP_QUERY_VPORT_STATE:
+ case MLX5_CMD_OP_MODIFY_VPORT_STATE:
+ case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+ case MLX5_CMD_OP_SET_ROCE_ADDRESS:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
+ case MLX5_CMD_OP_QUERY_VNIC_ENV:
+ case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
+ case MLX5_CMD_OP_QUERY_RATE_LIMIT:
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT:
+ case MLX5_CMD_OP_ALLOC_PD:
+ case MLX5_CMD_OP_ALLOC_UAR:
+ case MLX5_CMD_OP_CONFIG_INT_MODERATION:
+ case MLX5_CMD_OP_ACCESS_REG:
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
+ case MLX5_CMD_OP_MAD_IFC:
+ case MLX5_CMD_OP_QUERY_MAD_DEMUX:
+ case MLX5_CMD_OP_SET_MAD_DEMUX:
+ case MLX5_CMD_OP_NOP:
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ case MLX5_CMD_OP_QUERY_CONG_STATUS:
+ case MLX5_CMD_OP_MODIFY_CONG_STATUS:
+ case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+ case MLX5_CMD_OP_MODIFY_CONG_PARAMS:
+ case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_CREATE_LAG:
+ case MLX5_CMD_OP_MODIFY_LAG:
+ case MLX5_CMD_OP_QUERY_LAG:
+ case MLX5_CMD_OP_CREATE_VPORT_LAG:
+ case MLX5_CMD_OP_CREATE_TIR:
+ case MLX5_CMD_OP_MODIFY_TIR:
+ case MLX5_CMD_OP_QUERY_TIR:
+ case MLX5_CMD_OP_CREATE_SQ:
+ case MLX5_CMD_OP_MODIFY_SQ:
+ case MLX5_CMD_OP_QUERY_SQ:
+ case MLX5_CMD_OP_CREATE_RQ:
+ case MLX5_CMD_OP_MODIFY_RQ:
+ case MLX5_CMD_OP_QUERY_RQ:
+ case MLX5_CMD_OP_CREATE_RMP:
+ case MLX5_CMD_OP_MODIFY_RMP:
+ case MLX5_CMD_OP_QUERY_RMP:
+ case MLX5_CMD_OP_CREATE_TIS:
+ case MLX5_CMD_OP_MODIFY_TIS:
+ case MLX5_CMD_OP_QUERY_TIS:
+ case MLX5_CMD_OP_CREATE_RQT:
+ case MLX5_CMD_OP_MODIFY_RQT:
+ case MLX5_CMD_OP_QUERY_RQT:
+
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_FPGA_CREATE_QP:
+ case MLX5_CMD_OP_FPGA_MODIFY_QP:
+ case MLX5_CMD_OP_FPGA_QUERY_QP:
+ case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ *status = MLX5_DRIVER_STATUS_ABORTED;
+ *synd = MLX5_DRIVER_SYND;
+ return -EIO;
+ default:
+ mlx5_core_err(dev, "Unknown FW command (%d)\n", op);
+ return -EINVAL;
+ }
+}
+
+const char *mlx5_command_str(int command)
+{
+#define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd
+
+ switch (command) {
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP);
+ MLX5_COMMAND_STR_CASE(QUERY_ADAPTER);
+ MLX5_COMMAND_STR_CASE(INIT_HCA);
+ MLX5_COMMAND_STR_CASE(TEARDOWN_HCA);
+ MLX5_COMMAND_STR_CASE(ENABLE_HCA);
+ MLX5_COMMAND_STR_CASE(DISABLE_HCA);
+ MLX5_COMMAND_STR_CASE(QUERY_PAGES);
+ MLX5_COMMAND_STR_CASE(MANAGE_PAGES);
+ MLX5_COMMAND_STR_CASE(SET_HCA_CAP);
+ MLX5_COMMAND_STR_CASE(QUERY_ISSI);
+ MLX5_COMMAND_STR_CASE(SET_ISSI);
+ MLX5_COMMAND_STR_CASE(SET_DRIVER_VERSION);
+ MLX5_COMMAND_STR_CASE(CREATE_MKEY);
+ MLX5_COMMAND_STR_CASE(QUERY_MKEY);
+ MLX5_COMMAND_STR_CASE(DESTROY_MKEY);
+ MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS);
+ MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME);
+ MLX5_COMMAND_STR_CASE(CREATE_EQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_EQ);
+ MLX5_COMMAND_STR_CASE(QUERY_EQ);
+ MLX5_COMMAND_STR_CASE(GEN_EQE);
+ MLX5_COMMAND_STR_CASE(CREATE_CQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_CQ);
+ MLX5_COMMAND_STR_CASE(QUERY_CQ);
+ MLX5_COMMAND_STR_CASE(MODIFY_CQ);
+ MLX5_COMMAND_STR_CASE(CREATE_QP);
+ MLX5_COMMAND_STR_CASE(DESTROY_QP);
+ MLX5_COMMAND_STR_CASE(RST2INIT_QP);
+ MLX5_COMMAND_STR_CASE(INIT2RTR_QP);
+ MLX5_COMMAND_STR_CASE(RTR2RTS_QP);
+ MLX5_COMMAND_STR_CASE(RTS2RTS_QP);
+ MLX5_COMMAND_STR_CASE(SQERR2RTS_QP);
+ MLX5_COMMAND_STR_CASE(2ERR_QP);
+ MLX5_COMMAND_STR_CASE(2RST_QP);
+ MLX5_COMMAND_STR_CASE(QUERY_QP);
+ MLX5_COMMAND_STR_CASE(SQD_RTS_QP);
+ MLX5_COMMAND_STR_CASE(INIT2INIT_QP);
+ MLX5_COMMAND_STR_CASE(CREATE_PSV);
+ MLX5_COMMAND_STR_CASE(DESTROY_PSV);
+ MLX5_COMMAND_STR_CASE(CREATE_SRQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_SRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_SRQ);
+ MLX5_COMMAND_STR_CASE(ARM_RQ);
+ MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(CREATE_DCT);
+ MLX5_COMMAND_STR_CASE(DESTROY_DCT);
+ MLX5_COMMAND_STR_CASE(DRAIN_DCT);
+ MLX5_COMMAND_STR_CASE(QUERY_DCT);
+ MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION);
+ MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE);
+ MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE);
+ MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS);
+ MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS);
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID);
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY);
+ MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV);
+ MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER);
+ MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
+ MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
+ MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT);
+ MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT);
+ MLX5_COMMAND_STR_CASE(ALLOC_PD);
+ MLX5_COMMAND_STR_CASE(DEALLOC_PD);
+ MLX5_COMMAND_STR_CASE(ALLOC_UAR);
+ MLX5_COMMAND_STR_CASE(DEALLOC_UAR);
+ MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION);
+ MLX5_COMMAND_STR_CASE(ACCESS_REG);
+ MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG);
+ MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG);
+ MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG);
+ MLX5_COMMAND_STR_CASE(MAD_IFC);
+ MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX);
+ MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX);
+ MLX5_COMMAND_STR_CASE(NOP);
+ MLX5_COMMAND_STR_CASE(ALLOC_XRCD);
+ MLX5_COMMAND_STR_CASE(DEALLOC_XRCD);
+ MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN);
+ MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS);
+ MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS);
+ MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS);
+ MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS);
+ MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS);
+ MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT);
+ MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT);
+ MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(SET_WOL_ROL);
+ MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL);
+ MLX5_COMMAND_STR_CASE(CREATE_LAG);
+ MLX5_COMMAND_STR_CASE(MODIFY_LAG);
+ MLX5_COMMAND_STR_CASE(QUERY_LAG);
+ MLX5_COMMAND_STR_CASE(DESTROY_LAG);
+ MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG);
+ MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG);
+ MLX5_COMMAND_STR_CASE(CREATE_TIR);
+ MLX5_COMMAND_STR_CASE(MODIFY_TIR);
+ MLX5_COMMAND_STR_CASE(DESTROY_TIR);
+ MLX5_COMMAND_STR_CASE(QUERY_TIR);
+ MLX5_COMMAND_STR_CASE(CREATE_SQ);
+ MLX5_COMMAND_STR_CASE(MODIFY_SQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_SQ);
+ MLX5_COMMAND_STR_CASE(QUERY_SQ);
+ MLX5_COMMAND_STR_CASE(CREATE_RQ);
+ MLX5_COMMAND_STR_CASE(MODIFY_RQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_RQ);
+ MLX5_COMMAND_STR_CASE(QUERY_RQ);
+ MLX5_COMMAND_STR_CASE(CREATE_RMP);
+ MLX5_COMMAND_STR_CASE(MODIFY_RMP);
+ MLX5_COMMAND_STR_CASE(DESTROY_RMP);
+ MLX5_COMMAND_STR_CASE(QUERY_RMP);
+ MLX5_COMMAND_STR_CASE(CREATE_TIS);
+ MLX5_COMMAND_STR_CASE(MODIFY_TIS);
+ MLX5_COMMAND_STR_CASE(DESTROY_TIS);
+ MLX5_COMMAND_STR_CASE(QUERY_TIS);
+ MLX5_COMMAND_STR_CASE(CREATE_RQT);
+ MLX5_COMMAND_STR_CASE(MODIFY_RQT);
+ MLX5_COMMAND_STR_CASE(DESTROY_RQT);
+ MLX5_COMMAND_STR_CASE(QUERY_RQT);
+ MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ROOT);
+ MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP);
+ MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP);
+ MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
+ MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
+ MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
+ MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+ MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
+ MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
+ MLX5_COMMAND_STR_CASE(CREATE_XRQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_XRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_XRQ);
+ MLX5_COMMAND_STR_CASE(ARM_XRQ);
+ MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
+ default: return "unknown command opcode";
+ }
+}
+
+static const char *cmd_status_str(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_STAT_OK:
+ return "OK";
+ case MLX5_CMD_STAT_INT_ERR:
+ return "internal error";
+ case MLX5_CMD_STAT_BAD_OP_ERR:
+ return "bad operation";
+ case MLX5_CMD_STAT_BAD_PARAM_ERR:
+ return "bad parameter";
+ case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
+ return "bad system state";
+ case MLX5_CMD_STAT_BAD_RES_ERR:
+ return "bad resource";
+ case MLX5_CMD_STAT_RES_BUSY:
+ return "resource busy";
+ case MLX5_CMD_STAT_LIM_ERR:
+ return "limits exceeded";
+ case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
+ return "bad resource state";
+ case MLX5_CMD_STAT_IX_ERR:
+ return "bad index";
+ case MLX5_CMD_STAT_NO_RES_ERR:
+ return "no resources";
+ case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
+ return "bad input length";
+ case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
+ return "bad output length";
+ case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
+ return "bad QP state";
+ case MLX5_CMD_STAT_BAD_PKT_ERR:
+ return "bad packet (discarded)";
+ case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
+ return "bad size too many outstanding CQEs";
+ default:
+ return "unknown status";
+ }
+}
+
+static int cmd_status_to_err(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_STAT_OK: return 0;
+ case MLX5_CMD_STAT_INT_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_RES_BUSY: return -EBUSY;
+ case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM;
+ case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_IX_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN;
+ case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL;
+ default: return -EIO;
+ }
+}
+
+struct mlx5_ifc_mbox_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_mbox_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome)
+{
+ *status = MLX5_GET(mbox_out, out, status);
+ *syndrome = MLX5_GET(mbox_out, out, syndrome);
+}
+
+static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out)
+{
+ u32 syndrome;
+ u8 status;
+ u16 opcode;
+ u16 op_mod;
+ u16 uid;
+
+ mlx5_cmd_mbox_status(out, &status, &syndrome);
+ if (!status)
+ return 0;
+
+ opcode = MLX5_GET(mbox_in, in, opcode);
+ op_mod = MLX5_GET(mbox_in, in, op_mod);
+ uid = MLX5_GET(mbox_in, in, uid);
+
+ if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
+ mlx5_core_err_rl(dev,
+ "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
+ mlx5_command_str(opcode), opcode, op_mod,
+ cmd_status_str(status), status, syndrome);
+ else
+ mlx5_core_dbg(dev,
+ "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
+ mlx5_command_str(opcode),
+ opcode, op_mod,
+ cmd_status_str(status),
+ status,
+ syndrome);
+
+ return cmd_status_to_err(status);
+}
+
+static void dump_command(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_work_ent *ent, int input)
+{
+ struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
+ u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode);
+ struct mlx5_cmd_mailbox *next = msg->next;
+ int n = mlx5_calc_cmd_blocks(msg);
+ int data_only;
+ u32 offset = 0;
+ int dump_len;
+ int i;
+
+ data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
+
+ if (data_only)
+ mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA,
+ "dump command data %s(0x%x) %s\n",
+ mlx5_command_str(op), op,
+ input ? "INPUT" : "OUTPUT");
+ else
+ mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n",
+ mlx5_command_str(op), op,
+ input ? "INPUT" : "OUTPUT");
+
+ if (data_only) {
+ if (input) {
+ dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset);
+ offset += sizeof(ent->lay->in);
+ } else {
+ dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset);
+ offset += sizeof(ent->lay->out);
+ }
+ } else {
+ dump_buf(ent->lay, sizeof(*ent->lay), 0, offset);
+ offset += sizeof(*ent->lay);
+ }
+
+ for (i = 0; i < n && next; i++) {
+ if (data_only) {
+ dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
+ dump_buf(next->buf, dump_len, 1, offset);
+ offset += MLX5_CMD_DATA_BLOCK_SIZE;
+ } else {
+ mlx5_core_dbg(dev, "command block:\n");
+ dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset);
+ offset += sizeof(struct mlx5_cmd_prot_block);
+ }
+ next = next->next;
+ }
+
+ if (data_only)
+ pr_debug("\n");
+}
+
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+ return MLX5_GET(mbox_in, in->first.data, opcode);
+}
+
+static void cb_timeout_handler(struct work_struct *work)
+{
+ struct delayed_work *dwork = container_of(work, struct delayed_work,
+ work);
+ struct mlx5_cmd_work_ent *ent = container_of(dwork,
+ struct mlx5_cmd_work_ent,
+ cb_timeout_work);
+ struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+ cmd);
+
+ ent->ret = -ETIMEDOUT;
+ mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+ mlx5_command_str(msg_to_opcode(ent->in)),
+ msg_to_opcode(ent->in));
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+}
+
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
+static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_msg *msg);
+
+static void cmd_work_handler(struct work_struct *work)
+{
+ struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+ struct mlx5_cmd *cmd = ent->cmd;
+ struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+ struct mlx5_cmd_layout *lay;
+ struct semaphore *sem;
+ unsigned long flags;
+ bool poll_cmd = ent->polling;
+ int alloc_ret;
+ int cmd_mode;
+
+ complete(&ent->handling);
+ sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
+ down(sem);
+ if (!ent->page_queue) {
+ alloc_ret = alloc_ent(cmd);
+ if (alloc_ret < 0) {
+ mlx5_core_err(dev, "failed to allocate command entry\n");
+ if (ent->callback) {
+ ent->callback(-EAGAIN, ent->context);
+ mlx5_free_cmd_msg(dev, ent->out);
+ free_msg(dev, ent->in);
+ free_cmd(ent);
+ } else {
+ ent->ret = -EAGAIN;
+ complete(&ent->done);
+ }
+ up(sem);
+ return;
+ }
+ ent->idx = alloc_ret;
+ } else {
+ ent->idx = cmd->max_reg_cmds;
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ clear_bit(ent->idx, &cmd->bitmask);
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+ }
+
+ cmd->ent_arr[ent->idx] = ent;
+ lay = get_inst(cmd, ent->idx);
+ ent->lay = lay;
+ memset(lay, 0, sizeof(*lay));
+ memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
+ ent->op = be32_to_cpu(lay->in[0]) >> 16;
+ if (ent->in->next)
+ lay->in_ptr = cpu_to_be64(ent->in->next->dma);
+ lay->inlen = cpu_to_be32(ent->in->len);
+ if (ent->out->next)
+ lay->out_ptr = cpu_to_be64(ent->out->next->dma);
+ lay->outlen = cpu_to_be32(ent->out->len);
+ lay->type = MLX5_PCI_CMD_XPORT;
+ lay->token = ent->token;
+ lay->status_own = CMD_OWNER_HW;
+ set_signature(ent, !cmd->checksum_disabled);
+ dump_command(dev, ent, 1);
+ ent->ts1 = ktime_get_ns();
+ cmd_mode = cmd->mode;
+
+ if (ent->callback)
+ schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+ set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
+
+ /* Skip sending command to fw if internal error */
+ if (pci_channel_offline(dev->pdev) ||
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ u8 status = 0;
+ u32 drv_synd;
+
+ ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
+ MLX5_SET(mbox_out, ent->out, status, status);
+ MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
+
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ /* no doorbell, no need to keep the entry */
+ free_ent(cmd, ent->idx);
+ if (ent->callback)
+ free_cmd(ent);
+ return;
+ }
+
+ /* ring doorbell after the descriptor is valid */
+ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
+ wmb();
+ iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
+ mmiowb();
+ /* if not in polling don't use ent after this point */
+ if (cmd_mode == CMD_MODE_POLLING || poll_cmd) {
+ poll_timeout(ent);
+ /* make sure we read the descriptor after ownership is SW */
+ rmb();
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
+ }
+}
+
+static const char *deliv_status_to_str(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_DELIVERY_STAT_OK:
+ return "no errors";
+ case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
+ return "signature error";
+ case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
+ return "token error";
+ case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
+ return "bad block number";
+ case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
+ return "output pointer not aligned to block size";
+ case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
+ return "input pointer not aligned to block size";
+ case MLX5_CMD_DELIVERY_STAT_FW_ERR:
+ return "firmware internal error";
+ case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
+ return "command input length error";
+ case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
+ return "command output length error";
+ case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
+ return "reserved fields not cleared";
+ case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
+ return "bad command descriptor type";
+ default:
+ return "unknown status code";
+ }
+}
+
+static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
+{
+ unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int err;
+
+ if (!wait_for_completion_timeout(&ent->handling, timeout) &&
+ cancel_work_sync(&ent->work)) {
+ ent->ret = -ECANCELED;
+ goto out_err;
+ }
+ if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
+ wait_for_completion(&ent->done);
+ } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+ ent->ret = -ETIMEDOUT;
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ }
+
+out_err:
+ err = ent->ret;
+
+ if (err == -ETIMEDOUT) {
+ mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+ mlx5_command_str(msg_to_opcode(ent->in)),
+ msg_to_opcode(ent->in));
+ } else if (err == -ECANCELED) {
+ mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
+ mlx5_command_str(msg_to_opcode(ent->in)),
+ msg_to_opcode(ent->in));
+ }
+ mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
+ err, deliv_status_to_str(ent->status), ent->status);
+
+ return err;
+}
+
+/* Notes:
+ * 1. Callback functions may not sleep
+ * 2. page queue commands do not support asynchrous completion
+ */
+static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out, void *uout, int uout_size,
+ mlx5_cmd_cbk_t callback,
+ void *context, int page_queue, u8 *status,
+ u8 token, bool force_polling)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct mlx5_cmd_work_ent *ent;
+ struct mlx5_cmd_stats *stats;
+ int err = 0;
+ s64 ds;
+ u16 op;
+
+ if (callback && page_queue)
+ return -EINVAL;
+
+ ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
+ page_queue);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ ent->token = token;
+ ent->polling = force_polling;
+
+ init_completion(&ent->handling);
+ if (!callback)
+ init_completion(&ent->done);
+
+ INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler);
+ INIT_WORK(&ent->work, cmd_work_handler);
+ if (page_queue) {
+ cmd_work_handler(&ent->work);
+ } else if (!queue_work(cmd->wq, &ent->work)) {
+ mlx5_core_warn(dev, "failed to queue work\n");
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ if (callback)
+ goto out;
+
+ err = wait_func(dev, ent);
+ if (err == -ETIMEDOUT)
+ goto out;
+ if (err == -ECANCELED)
+ goto out_free;
+
+ ds = ent->ts2 - ent->ts1;
+ op = MLX5_GET(mbox_in, in->first.data, opcode);
+ if (op < ARRAY_SIZE(cmd->stats)) {
+ stats = &cmd->stats[op];
+ spin_lock_irq(&stats->lock);
+ stats->sum += ds;
+ ++stats->n;
+ spin_unlock_irq(&stats->lock);
+ }
+ mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+ "fw exec time for %s is %lld nsec\n",
+ mlx5_command_str(op), ds);
+ *status = ent->status;
+
+out_free:
+ free_cmd(ent);
+out:
+ return err;
+}
+
+static ssize_t dbg_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ char lbuf[3];
+ int err;
+
+ if (!dbg->in_msg || !dbg->out_msg)
+ return -ENOMEM;
+
+ if (count < sizeof(lbuf) - 1)
+ return -EINVAL;
+
+ if (copy_from_user(lbuf, buf, sizeof(lbuf) - 1))
+ return -EFAULT;
+
+ lbuf[sizeof(lbuf) - 1] = 0;
+
+ if (strcmp(lbuf, "go"))
+ return -EINVAL;
+
+ err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen);
+
+ return err ? err : count;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = dbg_write,
+};
+
+static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
+ u8 token)
+{
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_mailbox *next;
+ int copy;
+
+ if (!to || !from)
+ return -ENOMEM;
+
+ copy = min_t(int, size, sizeof(to->first.data));
+ memcpy(to->first.data, from, copy);
+ size -= copy;
+ from += copy;
+
+ next = to->next;
+ while (size) {
+ if (!next) {
+ /* this is a BUG */
+ return -ENOMEM;
+ }
+
+ copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+ block = next->buf;
+ memcpy(block->data, from, copy);
+ from += copy;
+ size -= copy;
+ block->token = token;
+ next = next->next;
+ }
+
+ return 0;
+}
+
+static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size)
+{
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_mailbox *next;
+ int copy;
+
+ if (!to || !from)
+ return -ENOMEM;
+
+ copy = min_t(int, size, sizeof(from->first.data));
+ memcpy(to, from->first.data, copy);
+ size -= copy;
+ to += copy;
+
+ next = from->next;
+ while (size) {
+ if (!next) {
+ /* this is a BUG */
+ return -ENOMEM;
+ }
+
+ copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+ block = next->buf;
+
+ memcpy(to, block->data, copy);
+ to += copy;
+ size -= copy;
+ next = next->next;
+ }
+
+ return 0;
+}
+
+static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev,
+ gfp_t flags)
+{
+ struct mlx5_cmd_mailbox *mailbox;
+
+ mailbox = kmalloc(sizeof(*mailbox), flags);
+ if (!mailbox)
+ return ERR_PTR(-ENOMEM);
+
+ mailbox->buf = dma_pool_zalloc(dev->cmd.pool, flags,
+ &mailbox->dma);
+ if (!mailbox->buf) {
+ mlx5_core_dbg(dev, "failed allocation\n");
+ kfree(mailbox);
+ return ERR_PTR(-ENOMEM);
+ }
+ mailbox->next = NULL;
+
+ return mailbox;
+}
+
+static void free_cmd_box(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_mailbox *mailbox)
+{
+ dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+ kfree(mailbox);
+}
+
+static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+ gfp_t flags, int size,
+ u8 token)
+{
+ struct mlx5_cmd_mailbox *tmp, *head = NULL;
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_msg *msg;
+ int err;
+ int n;
+ int i;
+
+ msg = kzalloc(sizeof(*msg), flags);
+ if (!msg)
+ return ERR_PTR(-ENOMEM);
+
+ msg->len = size;
+ n = mlx5_calc_cmd_blocks(msg);
+
+ for (i = 0; i < n; i++) {
+ tmp = alloc_cmd_box(dev, flags);
+ if (IS_ERR(tmp)) {
+ mlx5_core_warn(dev, "failed allocating block\n");
+ err = PTR_ERR(tmp);
+ goto err_alloc;
+ }
+
+ block = tmp->buf;
+ tmp->next = head;
+ block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
+ block->block_num = cpu_to_be32(n - i - 1);
+ block->token = token;
+ head = tmp;
+ }
+ msg->next = head;
+ return msg;
+
+err_alloc:
+ while (head) {
+ tmp = head->next;
+ free_cmd_box(dev, head);
+ head = tmp;
+ }
+ kfree(msg);
+
+ return ERR_PTR(err);
+}
+
+static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_msg *msg)
+{
+ struct mlx5_cmd_mailbox *head = msg->next;
+ struct mlx5_cmd_mailbox *next;
+
+ while (head) {
+ next = head->next;
+ free_cmd_box(dev, head);
+ head = next;
+ }
+ kfree(msg);
+}
+
+static ssize_t data_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ void *ptr;
+
+ if (*pos != 0)
+ return -EINVAL;
+
+ kfree(dbg->in_msg);
+ dbg->in_msg = NULL;
+ dbg->inlen = 0;
+ ptr = memdup_user(buf, count);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+ dbg->in_msg = ptr;
+ dbg->inlen = count;
+
+ *pos = count;
+
+ return count;
+}
+
+static ssize_t data_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+
+ if (!dbg->out_msg)
+ return -ENOMEM;
+
+ return simple_read_from_buffer(buf, count, pos, dbg->out_msg,
+ dbg->outlen);
+}
+
+static const struct file_operations dfops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = data_write,
+ .read = data_read,
+};
+
+static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ char outlen[8];
+ int err;
+
+ err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen);
+ if (err < 0)
+ return err;
+
+ return simple_read_from_buffer(buf, count, pos, outlen, err);
+}
+
+static ssize_t outlen_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ char outlen_str[8] = {0};
+ int outlen;
+ void *ptr;
+ int err;
+
+ if (*pos != 0 || count > 6)
+ return -EINVAL;
+
+ kfree(dbg->out_msg);
+ dbg->out_msg = NULL;
+ dbg->outlen = 0;
+
+ if (copy_from_user(outlen_str, buf, count))
+ return -EFAULT;
+
+ err = sscanf(outlen_str, "%d", &outlen);
+ if (err < 0)
+ return err;
+
+ ptr = kzalloc(outlen, GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ dbg->out_msg = ptr;
+ dbg->outlen = outlen;
+
+ *pos = count;
+
+ return count;
+}
+
+static const struct file_operations olfops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = outlen_write,
+ .read = outlen_read,
+};
+
+static void set_wqname(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+
+ snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s",
+ dev_name(&dev->pdev->dev));
+}
+
+static void clean_debug_files(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+
+ if (!mlx5_debugfs_root)
+ return;
+
+ mlx5_cmdif_debugfs_cleanup(dev);
+ debugfs_remove_recursive(dbg->dbg_root);
+}
+
+static int create_debugfs_files(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ int err = -ENOMEM;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root);
+ if (!dbg->dbg_root)
+ return err;
+
+ dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root,
+ dev, &dfops);
+ if (!dbg->dbg_in)
+ goto err_dbg;
+
+ dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root,
+ dev, &dfops);
+ if (!dbg->dbg_out)
+ goto err_dbg;
+
+ dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root,
+ dev, &olfops);
+ if (!dbg->dbg_outlen)
+ goto err_dbg;
+
+ dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root,
+ &dbg->status);
+ if (!dbg->dbg_status)
+ goto err_dbg;
+
+ dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
+ if (!dbg->dbg_run)
+ goto err_dbg;
+
+ mlx5_cmdif_debugfs_init(dev);
+
+ return 0;
+
+err_dbg:
+ clean_debug_files(dev);
+ return err;
+}
+
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ for (i = 0; i < cmd->max_reg_cmds; i++)
+ down(&cmd->sem);
+ down(&cmd->pages_sem);
+
+ cmd->mode = mode;
+
+ up(&cmd->pages_sem);
+ for (i = 0; i < cmd->max_reg_cmds; i++)
+ up(&cmd->sem);
+}
+
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+{
+ mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}
+
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+ mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+}
+
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+{
+ unsigned long flags;
+
+ if (msg->parent) {
+ spin_lock_irqsave(&msg->parent->lock, flags);
+ list_add_tail(&msg->list, &msg->parent->head);
+ spin_unlock_irqrestore(&msg->parent->lock, flags);
+ } else {
+ mlx5_free_cmd_msg(dev, msg);
+ }
+}
+
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct mlx5_cmd_work_ent *ent;
+ mlx5_cmd_cbk_t callback;
+ void *context;
+ int err;
+ int i;
+ s64 ds;
+ struct mlx5_cmd_stats *stats;
+ unsigned long flags;
+ unsigned long vector;
+
+ /* there can be at most 32 command queues */
+ vector = vec & 0xffffffff;
+ for (i = 0; i < (1 << cmd->log_sz); i++) {
+ if (test_bit(i, &vector)) {
+ struct semaphore *sem;
+
+ ent = cmd->ent_arr[i];
+
+ /* if we already completed the command, ignore it */
+ if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
+ &ent->state)) {
+ /* only real completion can free the cmd slot */
+ if (!forced) {
+ mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+ ent->idx);
+ free_ent(cmd, ent->idx);
+ free_cmd(ent);
+ }
+ continue;
+ }
+
+ if (ent->callback)
+ cancel_delayed_work(&ent->cb_timeout_work);
+ if (ent->page_queue)
+ sem = &cmd->pages_sem;
+ else
+ sem = &cmd->sem;
+ ent->ts2 = ktime_get_ns();
+ memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
+ dump_command(dev, ent, 0);
+ if (!ent->ret) {
+ if (!cmd->checksum_disabled)
+ ent->ret = verify_signature(ent);
+ else
+ ent->ret = 0;
+ if (vec & MLX5_TRIGGERED_CMD_COMP)
+ ent->status = MLX5_DRIVER_STATUS_ABORTED;
+ else
+ ent->status = ent->lay->status_own >> 1;
+
+ mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
+ ent->ret, deliv_status_to_str(ent->status), ent->status);
+ }
+
+ /* only real completion will free the entry slot */
+ if (!forced)
+ free_ent(cmd, ent->idx);
+
+ if (ent->callback) {
+ ds = ent->ts2 - ent->ts1;
+ if (ent->op < ARRAY_SIZE(cmd->stats)) {
+ stats = &cmd->stats[ent->op];
+ spin_lock_irqsave(&stats->lock, flags);
+ stats->sum += ds;
+ ++stats->n;
+ spin_unlock_irqrestore(&stats->lock, flags);
+ }
+
+ callback = ent->callback;
+ context = ent->context;
+ err = ent->ret;
+ if (!err) {
+ err = mlx5_copy_from_msg(ent->uout,
+ ent->out,
+ ent->uout_size);
+
+ err = err ? err : mlx5_cmd_check(dev,
+ ent->in->first.data,
+ ent->uout);
+ }
+
+ mlx5_free_cmd_msg(dev, ent->out);
+ free_msg(dev, ent->in);
+
+ err = err ? err : ent->status;
+ if (!forced)
+ free_cmd(ent);
+ callback(err, context);
+ } else {
+ complete(&ent->done);
+ }
+ up(sem);
+ }
+ }
+}
+EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+static int status_to_err(u8 status)
+{
+ return status ? -1 : 0; /* TBD more meaningful codes */
+}
+
+static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
+ gfp_t gfp)
+{
+ struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+ struct cmd_msg_cache *ch = NULL;
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ if (in_size <= 16)
+ goto cache_miss;
+
+ for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ ch = &cmd->cache[i];
+ if (in_size > ch->max_inbox_size)
+ continue;
+ spin_lock_irq(&ch->lock);
+ if (list_empty(&ch->head)) {
+ spin_unlock_irq(&ch->lock);
+ continue;
+ }
+ msg = list_entry(ch->head.next, typeof(*msg), list);
+ /* For cached lists, we must explicitly state what is
+ * the real size
+ */
+ msg->len = in_size;
+ list_del(&msg->list);
+ spin_unlock_irq(&ch->lock);
+ break;
+ }
+
+ if (!IS_ERR(msg))
+ return msg;
+
+cache_miss:
+ msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
+ return msg;
+}
+
+static int is_manage_pages(void *in)
+{
+ return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES;
+}
+
+static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int out_size, mlx5_cmd_cbk_t callback, void *context,
+ bool force_polling)
+{
+ struct mlx5_cmd_msg *inb;
+ struct mlx5_cmd_msg *outb;
+ int pages_queue;
+ gfp_t gfp;
+ int err;
+ u8 status = 0;
+ u32 drv_synd;
+ u8 token;
+
+ if (pci_channel_offline(dev->pdev) ||
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+
+ err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
+ MLX5_SET(mbox_out, out, status, status);
+ MLX5_SET(mbox_out, out, syndrome, drv_synd);
+ return err;
+ }
+
+ pages_queue = is_manage_pages(in);
+ gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
+
+ inb = alloc_msg(dev, in_size, gfp);
+ if (IS_ERR(inb)) {
+ err = PTR_ERR(inb);
+ return err;
+ }
+
+ token = alloc_token(&dev->cmd);
+
+ err = mlx5_copy_to_msg(inb, in, in_size, token);
+ if (err) {
+ mlx5_core_warn(dev, "err %d\n", err);
+ goto out_in;
+ }
+
+ outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token);
+ if (IS_ERR(outb)) {
+ err = PTR_ERR(outb);
+ goto out_in;
+ }
+
+ err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
+ pages_queue, &status, token, force_polling);
+ if (err)
+ goto out_out;
+
+ mlx5_core_dbg(dev, "err %d, status %d\n", err, status);
+ if (status) {
+ err = status_to_err(status);
+ goto out_out;
+ }
+
+ if (!callback)
+ err = mlx5_copy_from_msg(out, outb, out_size);
+
+out_out:
+ if (!callback)
+ mlx5_free_cmd_msg(dev, outb);
+
+out_in:
+ if (!callback)
+ free_msg(dev, inb);
+ return err;
+}
+
+int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int out_size)
+{
+ int err;
+
+ err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
+ return err ? : mlx5_cmd_check(dev, in, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec);
+
+int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
+ void *out, int out_size, mlx5_cmd_cbk_t callback,
+ void *context)
+{
+ return cmd_exec(dev, in, in_size, out, out_size, callback, context,
+ false);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_cb);
+
+int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
+ void *out, int out_size)
+{
+ int err;
+
+ err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
+
+ return err ? : mlx5_cmd_check(dev, in, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_polling);
+
+static void destroy_msg_cache(struct mlx5_core_dev *dev)
+{
+ struct cmd_msg_cache *ch;
+ struct mlx5_cmd_msg *msg;
+ struct mlx5_cmd_msg *n;
+ int i;
+
+ for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ ch = &dev->cmd.cache[i];
+ list_for_each_entry_safe(msg, n, &ch->head, list) {
+ list_del(&msg->list);
+ mlx5_free_cmd_msg(dev, msg);
+ }
+ }
+}
+
+static unsigned cmd_cache_num_ent[MLX5_NUM_COMMAND_CACHES] = {
+ 512, 32, 16, 8, 2
+};
+
+static unsigned cmd_cache_ent_size[MLX5_NUM_COMMAND_CACHES] = {
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 2,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 16,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 256,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 512,
+};
+
+static void create_msg_cache(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct cmd_msg_cache *ch;
+ struct mlx5_cmd_msg *msg;
+ int i;
+ int k;
+
+ /* Initialize and fill the caches with initial entries */
+ for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) {
+ ch = &cmd->cache[k];
+ spin_lock_init(&ch->lock);
+ INIT_LIST_HEAD(&ch->head);
+ ch->num_ent = cmd_cache_num_ent[k];
+ ch->max_inbox_size = cmd_cache_ent_size[k];
+ for (i = 0; i < ch->num_ent; i++) {
+ msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL | __GFP_NOWARN,
+ ch->max_inbox_size, 0);
+ if (IS_ERR(msg))
+ break;
+ msg->parent = ch;
+ list_add_tail(&msg->list, &ch->head);
+ }
+ }
+}
+
+static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
+{
+ struct device *ddev = &dev->pdev->dev;
+
+ cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
+ &cmd->alloc_dma, GFP_KERNEL);
+ if (!cmd->cmd_alloc_buf)
+ return -ENOMEM;
+
+ /* make sure it is aligned to 4K */
+ if (!((uintptr_t)cmd->cmd_alloc_buf & (MLX5_ADAPTER_PAGE_SIZE - 1))) {
+ cmd->cmd_buf = cmd->cmd_alloc_buf;
+ cmd->dma = cmd->alloc_dma;
+ cmd->alloc_size = MLX5_ADAPTER_PAGE_SIZE;
+ return 0;
+ }
+
+ dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
+ cmd->alloc_dma);
+ cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev,
+ 2 * MLX5_ADAPTER_PAGE_SIZE - 1,
+ &cmd->alloc_dma, GFP_KERNEL);
+ if (!cmd->cmd_alloc_buf)
+ return -ENOMEM;
+
+ cmd->cmd_buf = PTR_ALIGN(cmd->cmd_alloc_buf, MLX5_ADAPTER_PAGE_SIZE);
+ cmd->dma = ALIGN(cmd->alloc_dma, MLX5_ADAPTER_PAGE_SIZE);
+ cmd->alloc_size = 2 * MLX5_ADAPTER_PAGE_SIZE - 1;
+ return 0;
+}
+
+static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
+{
+ struct device *ddev = &dev->pdev->dev;
+
+ dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf,
+ cmd->alloc_dma);
+}
+
+int mlx5_cmd_init(struct mlx5_core_dev *dev)
+{
+ int size = sizeof(struct mlx5_cmd_prot_block);
+ int align = roundup_pow_of_two(size);
+ struct mlx5_cmd *cmd = &dev->cmd;
+ u32 cmd_h, cmd_l;
+ u16 cmd_if_rev;
+ int err;
+ int i;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd_if_rev = cmdif_rev(dev);
+ if (cmd_if_rev != CMD_IF_REV) {
+ dev_err(&dev->pdev->dev,
+ "Driver cmdif rev(%d) differs from firmware's(%d)\n",
+ CMD_IF_REV, cmd_if_rev);
+ return -EINVAL;
+ }
+
+ cmd->pool = dma_pool_create("mlx5_cmd", &dev->pdev->dev, size, align,
+ 0);
+ if (!cmd->pool)
+ return -ENOMEM;
+
+ err = alloc_cmd_page(dev, cmd);
+ if (err)
+ goto err_free_pool;
+
+ cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff;
+ cmd->log_sz = cmd_l >> 4 & 0xf;
+ cmd->log_stride = cmd_l & 0xf;
+ if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) {
+ dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n",
+ 1 << cmd->log_sz);
+ err = -EINVAL;
+ goto err_free_page;
+ }
+
+ if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) {
+ dev_err(&dev->pdev->dev, "command queue size overflow\n");
+ err = -EINVAL;
+ goto err_free_page;
+ }
+
+ cmd->checksum_disabled = 1;
+ cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
+ cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1;
+
+ cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
+ if (cmd->cmdif_rev > CMD_IF_REV) {
+ dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
+ CMD_IF_REV, cmd->cmdif_rev);
+ err = -EOPNOTSUPP;
+ goto err_free_page;
+ }
+
+ spin_lock_init(&cmd->alloc_lock);
+ spin_lock_init(&cmd->token_lock);
+ for (i = 0; i < ARRAY_SIZE(cmd->stats); i++)
+ spin_lock_init(&cmd->stats[i].lock);
+
+ sema_init(&cmd->sem, cmd->max_reg_cmds);
+ sema_init(&cmd->pages_sem, 1);
+
+ cmd_h = (u32)((u64)(cmd->dma) >> 32);
+ cmd_l = (u32)(cmd->dma);
+ if (cmd_l & 0xfff) {
+ dev_err(&dev->pdev->dev, "invalid command queue address\n");
+ err = -ENOMEM;
+ goto err_free_page;
+ }
+
+ iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h);
+ iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz);
+
+ /* Make sure firmware sees the complete address before we proceed */
+ wmb();
+
+ mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
+
+ cmd->mode = CMD_MODE_POLLING;
+
+ create_msg_cache(dev);
+
+ set_wqname(dev);
+ cmd->wq = create_singlethread_workqueue(cmd->wq_name);
+ if (!cmd->wq) {
+ dev_err(&dev->pdev->dev, "failed to create command workqueue\n");
+ err = -ENOMEM;
+ goto err_cache;
+ }
+
+ err = create_debugfs_files(dev);
+ if (err) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ return 0;
+
+err_wq:
+ destroy_workqueue(cmd->wq);
+
+err_cache:
+ destroy_msg_cache(dev);
+
+err_free_page:
+ free_cmd_page(dev, cmd);
+
+err_free_pool:
+ dma_pool_destroy(cmd->pool);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_init);
+
+void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+
+ clean_debug_files(dev);
+ destroy_workqueue(cmd->wq);
+ destroy_msg_cache(dev);
+ free_cmd_page(dev, cmd);
+ dma_pool_destroy(cmd->pool);
+}
+EXPORT_SYMBOL(mlx5_cmd_cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
new file mode 100644
index 000000000..a4179122a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mlx5/cq.h>
+#include "mlx5_core.h"
+
+#define TASKLET_MAX_TIME 2
+#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
+
+void mlx5_cq_tasklet_cb(unsigned long data)
+{
+ unsigned long flags;
+ unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
+ struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
+ struct mlx5_core_cq *mcq;
+ struct mlx5_core_cq *temp;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ list_splice_tail_init(&ctx->list, &ctx->process_list);
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ list_for_each_entry_safe(mcq, temp, &ctx->process_list,
+ tasklet_ctx.list) {
+ list_del_init(&mcq->tasklet_ctx.list);
+ mcq->tasklet_ctx.comp(mcq);
+ mlx5_cq_put(mcq);
+ if (time_after(jiffies, end))
+ break;
+ }
+
+ if (!list_empty(&ctx->process_list))
+ tasklet_schedule(&ctx->task);
+}
+
+static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
+{
+ unsigned long flags;
+ struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
+
+ spin_lock_irqsave(&tasklet_ctx->lock, flags);
+ /* When migrating CQs between EQs will be implemented, please note
+ * that you need to sync this point. It is possible that
+ * while migrating a CQ, completions on the old EQs could
+ * still arrive.
+ */
+ if (list_empty_careful(&cq->tasklet_ctx.list)) {
+ mlx5_cq_hold(cq);
+ list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
+ }
+ spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
+}
+
+int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen)
+{
+ int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+ u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
+ struct mlx5_eq *eq;
+ int err;
+
+ eq = mlx5_eqn2eq(dev, eqn);
+ if (IS_ERR(eq))
+ return PTR_ERR(eq);
+
+ memset(out, 0, sizeof(out));
+ MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (err)
+ return err;
+
+ cq->cqn = MLX5_GET(create_cq_out, out, cqn);
+ cq->cons_index = 0;
+ cq->arm_sn = 0;
+ cq->eq = eq;
+ refcount_set(&cq->refcount, 1);
+ init_completion(&cq->free);
+ if (!cq->comp)
+ cq->comp = mlx5_add_cq_to_tasklet;
+ /* assuming CQ will be deleted before the EQ */
+ cq->tasklet_ctx.priv = &eq->tasklet_ctx;
+ INIT_LIST_HEAD(&cq->tasklet_ctx.list);
+
+ /* Add to comp EQ CQ tree to recv comp events */
+ err = mlx5_eq_add_cq(eq, cq);
+ if (err)
+ goto err_cmd;
+
+ /* Add to async EQ CQ tree to recv async events */
+ err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+ if (err)
+ goto err_cq_add;
+
+ cq->pid = current->pid;
+ err = mlx5_debug_cq_add(dev, cq);
+ if (err)
+ mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n",
+ cq->cqn);
+
+ cq->uar = dev->priv.uar;
+
+ return 0;
+
+err_cq_add:
+ mlx5_eq_del_cq(eq, cq);
+err_cmd:
+ memset(din, 0, sizeof(din));
+ memset(dout, 0, sizeof(dout));
+ MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+ mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_cq);
+
+int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
+ int err;
+
+ err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+ if (err)
+ return err;
+
+ err = mlx5_eq_del_cq(cq->eq, cq);
+ if (err)
+ return err;
+
+ MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ synchronize_irq(cq->irqn);
+
+ mlx5_debug_cq_remove(dev, cq);
+ mlx5_cq_put(cq);
+ wait_for_completion(&cq->free);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_cq);
+
+int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0};
+
+ MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ);
+ MLX5_SET(query_cq_in, in, cqn, cq->cqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_cq);
+
+int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
+
+ MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_cq);
+
+int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
+ struct mlx5_core_cq *cq,
+ u16 cq_period,
+ u16 cq_max_count)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0};
+ void *cqc;
+
+ MLX5_SET(modify_cq_in, in, cqn, cq->cqn);
+ cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, cq_period, cq_period);
+ MLX5_SET(cqc, cqc, cq_max_count, cq_max_count);
+ MLX5_SET(modify_cq_in, in,
+ modify_field_select_resize_field_select.modify_field_select.modify_field_select,
+ MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
+
+ return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
+}
+EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
new file mode 100644
index 000000000..90fabd612
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+enum {
+ QP_PID,
+ QP_STATE,
+ QP_XPORT,
+ QP_MTU,
+ QP_N_RECV,
+ QP_RECV_SZ,
+ QP_N_SEND,
+ QP_LOG_PG_SZ,
+ QP_RQPN,
+};
+
+static char *qp_fields[] = {
+ [QP_PID] = "pid",
+ [QP_STATE] = "state",
+ [QP_XPORT] = "transport",
+ [QP_MTU] = "mtu",
+ [QP_N_RECV] = "num_recv",
+ [QP_RECV_SZ] = "rcv_wqe_sz",
+ [QP_N_SEND] = "num_send",
+ [QP_LOG_PG_SZ] = "log2_page_sz",
+ [QP_RQPN] = "remote_qpn",
+};
+
+enum {
+ EQ_NUM_EQES,
+ EQ_INTR,
+ EQ_LOG_PG_SZ,
+};
+
+static char *eq_fields[] = {
+ [EQ_NUM_EQES] = "num_eqes",
+ [EQ_INTR] = "intr",
+ [EQ_LOG_PG_SZ] = "log_page_size",
+};
+
+enum {
+ CQ_PID,
+ CQ_NUM_CQES,
+ CQ_LOG_PG_SZ,
+};
+
+static char *cq_fields[] = {
+ [CQ_PID] = "pid",
+ [CQ_NUM_CQES] = "num_cqes",
+ [CQ_LOG_PG_SZ] = "log_page_size",
+};
+
+struct dentry *mlx5_debugfs_root;
+EXPORT_SYMBOL(mlx5_debugfs_root);
+
+void mlx5_register_debugfs(void)
+{
+ mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL);
+ if (IS_ERR_OR_NULL(mlx5_debugfs_root))
+ mlx5_debugfs_root = NULL;
+}
+
+void mlx5_unregister_debugfs(void)
+{
+ debugfs_remove(mlx5_debugfs_root);
+}
+
+int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ atomic_set(&dev->num_qps, 0);
+
+ dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root);
+ if (!dev->priv.qp_debugfs)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->priv.qp_debugfs);
+}
+
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root);
+ if (!dev->priv.eq_debugfs)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->priv.eq_debugfs);
+}
+
+static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_cmd_stats *stats;
+ u64 field = 0;
+ int ret;
+ char tbuf[22];
+
+ stats = filp->private_data;
+ spin_lock_irq(&stats->lock);
+ if (stats->n)
+ field = div64_u64(stats->sum, stats->n);
+ spin_unlock_irq(&stats->lock);
+ ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static ssize_t average_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_cmd_stats *stats;
+
+ stats = filp->private_data;
+ spin_lock_irq(&stats->lock);
+ stats->sum = 0;
+ stats->n = 0;
+ spin_unlock_irq(&stats->lock);
+
+ *pos += count;
+
+ return count;
+}
+
+static const struct file_operations stats_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = average_read,
+ .write = average_write,
+};
+
+int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd_stats *stats;
+ struct dentry **cmd;
+ const char *namep;
+ int err;
+ int i;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ cmd = &dev->priv.cmdif_debugfs;
+ *cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
+ if (!*cmd)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) {
+ stats = &dev->cmd.stats[i];
+ namep = mlx5_command_str(i);
+ if (strcmp(namep, "unknown command opcode")) {
+ stats->root = debugfs_create_dir(namep, *cmd);
+ if (!stats->root) {
+ mlx5_core_warn(dev, "failed adding command %d\n",
+ i);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ stats->avg = debugfs_create_file("average", 0400,
+ stats->root, stats,
+ &stats_fops);
+ if (!stats->avg) {
+ mlx5_core_warn(dev, "failed creating debugfs file\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ stats->count = debugfs_create_u64("n", 0400,
+ stats->root,
+ &stats->n);
+ if (!stats->count) {
+ mlx5_core_warn(dev, "failed creating debugfs file\n");
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ }
+
+ return 0;
+out:
+ debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+ return err;
+}
+
+void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+}
+
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root);
+ if (!dev->priv.cq_debugfs)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->priv.cq_debugfs);
+}
+
+static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+ int index, int *is_str)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
+ struct mlx5_qp_context *ctx;
+ u64 param = 0;
+ u32 *out;
+ int err;
+ int no_sq;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return param;
+
+ err = mlx5_core_qp_query(dev, qp, out, outlen);
+ if (err) {
+ mlx5_core_warn(dev, "failed to query qp err=%d\n", err);
+ goto out;
+ }
+
+ *is_str = 0;
+
+ /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
+ ctx = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, out, qpc);
+
+ switch (index) {
+ case QP_PID:
+ param = qp->pid;
+ break;
+ case QP_STATE:
+ param = (unsigned long)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28);
+ *is_str = 1;
+ break;
+ case QP_XPORT:
+ param = (unsigned long)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff);
+ *is_str = 1;
+ break;
+ case QP_MTU:
+ switch (ctx->mtu_msgmax >> 5) {
+ case IB_MTU_256:
+ param = 256;
+ break;
+ case IB_MTU_512:
+ param = 512;
+ break;
+ case IB_MTU_1024:
+ param = 1024;
+ break;
+ case IB_MTU_2048:
+ param = 2048;
+ break;
+ case IB_MTU_4096:
+ param = 4096;
+ break;
+ default:
+ param = 0;
+ }
+ break;
+ case QP_N_RECV:
+ param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
+ break;
+ case QP_RECV_SZ:
+ param = 1 << ((ctx->rq_size_stride & 7) + 4);
+ break;
+ case QP_N_SEND:
+ no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15;
+ if (!no_sq)
+ param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11);
+ else
+ param = 0;
+ break;
+ case QP_LOG_PG_SZ:
+ param = (be32_to_cpu(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f;
+ param += 12;
+ break;
+ case QP_RQPN:
+ param = be32_to_cpu(ctx->log_pg_sz_remote_qpn) & 0xffffff;
+ break;
+ }
+
+out:
+ kfree(out);
+ return param;
+}
+
+static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ int index)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_eq_out);
+ u64 param = 0;
+ void *ctx;
+ u32 *out;
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return param;
+
+ err = mlx5_core_eq_query(dev, eq, out, outlen);
+ if (err) {
+ mlx5_core_warn(dev, "failed to query eq\n");
+ goto out;
+ }
+ ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry);
+
+ switch (index) {
+ case EQ_NUM_EQES:
+ param = 1 << MLX5_GET(eqc, ctx, log_eq_size);
+ break;
+ case EQ_INTR:
+ param = MLX5_GET(eqc, ctx, intr);
+ break;
+ case EQ_LOG_PG_SZ:
+ param = MLX5_GET(eqc, ctx, log_page_size) + 12;
+ break;
+ }
+
+out:
+ kfree(out);
+ return param;
+}
+
+static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ int index)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cq_out);
+ u64 param = 0;
+ void *ctx;
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return param;
+
+ err = mlx5_core_query_cq(dev, cq, out, outlen);
+ if (err) {
+ mlx5_core_warn(dev, "failed to query cq\n");
+ goto out;
+ }
+ ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context);
+
+ switch (index) {
+ case CQ_PID:
+ param = cq->pid;
+ break;
+ case CQ_NUM_CQES:
+ param = 1 << MLX5_GET(cqc, ctx, log_cq_size);
+ break;
+ case CQ_LOG_PG_SZ:
+ param = MLX5_GET(cqc, ctx, log_page_size);
+ break;
+ }
+
+out:
+ kvfree(out);
+ return param;
+}
+
+static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_field_desc *desc;
+ struct mlx5_rsc_debug *d;
+ char tbuf[18];
+ int is_str = 0;
+ u64 field;
+ int ret;
+
+ desc = filp->private_data;
+ d = (void *)(desc - desc->i) - sizeof(*d);
+ switch (d->type) {
+ case MLX5_DBG_RSC_QP:
+ field = qp_read_field(d->dev, d->object, desc->i, &is_str);
+ break;
+
+ case MLX5_DBG_RSC_EQ:
+ field = eq_read_field(d->dev, d->object, desc->i);
+ break;
+
+ case MLX5_DBG_RSC_CQ:
+ field = cq_read_field(d->dev, d->object, desc->i);
+ break;
+
+ default:
+ mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type);
+ return -EINVAL;
+ }
+
+ if (is_str)
+ ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)(unsigned long)field);
+ else
+ ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
+
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = dbg_read,
+};
+
+static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type,
+ struct dentry *root, struct mlx5_rsc_debug **dbg,
+ int rsn, char **field, int nfile, void *data)
+{
+ struct mlx5_rsc_debug *d;
+ char resn[32];
+ int err;
+ int i;
+
+ d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ d->dev = dev;
+ d->object = data;
+ d->type = type;
+ sprintf(resn, "0x%x", rsn);
+ d->root = debugfs_create_dir(resn, root);
+ if (!d->root) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ for (i = 0; i < nfile; i++) {
+ d->fields[i].i = i;
+ d->fields[i].dent = debugfs_create_file(field[i], 0400,
+ d->root, &d->fields[i],
+ &fops);
+ if (!d->fields[i].dent) {
+ err = -ENOMEM;
+ goto out_rem;
+ }
+ }
+ *dbg = d;
+
+ return 0;
+out_rem:
+ debugfs_remove_recursive(d->root);
+
+out_free:
+ kfree(d);
+ return err;
+}
+
+static void rem_res_tree(struct mlx5_rsc_debug *d)
+{
+ debugfs_remove_recursive(d->root);
+ kfree(d);
+}
+
+int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+ int err;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs,
+ &qp->dbg, qp->qpn, qp_fields,
+ ARRAY_SIZE(qp_fields), qp);
+ if (err)
+ qp->dbg = NULL;
+
+ return err;
+}
+
+void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ if (qp->dbg)
+ rem_res_tree(qp->dbg);
+}
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs,
+ &eq->dbg, eq->eqn, eq_fields,
+ ARRAY_SIZE(eq_fields), eq);
+ if (err)
+ eq->dbg = NULL;
+
+ return err;
+}
+
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ if (eq->dbg)
+ rem_res_tree(eq->dbg);
+}
+
+int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+ int err;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs,
+ &cq->dbg, cq->cqn, cq_fields,
+ ARRAY_SIZE(cq_fields), cq);
+ if (err)
+ cq->dbg = NULL;
+
+ return err;
+}
+
+void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ if (cq->dbg)
+ rem_res_tree(cq->dbg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
new file mode 100644
index 000000000..3692d6a1c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+static LIST_HEAD(intf_list);
+static LIST_HEAD(mlx5_dev_list);
+/* intf dev list mutex */
+static DEFINE_MUTEX(mlx5_intf_mutex);
+
+struct mlx5_device_context {
+ struct list_head list;
+ struct mlx5_interface *intf;
+ void *context;
+ unsigned long state;
+};
+
+struct mlx5_delayed_event {
+ struct list_head list;
+ struct mlx5_core_dev *dev;
+ enum mlx5_dev_event event;
+ unsigned long param;
+};
+
+enum {
+ MLX5_INTERFACE_ADDED,
+ MLX5_INTERFACE_ATTACHED,
+};
+
+static void add_delayed_event(struct mlx5_priv *priv,
+ struct mlx5_core_dev *dev,
+ enum mlx5_dev_event event,
+ unsigned long param)
+{
+ struct mlx5_delayed_event *delayed_event;
+
+ delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
+ if (!delayed_event) {
+ mlx5_core_err(dev, "event %d is missed\n", event);
+ return;
+ }
+
+ mlx5_core_dbg(dev, "Accumulating event %d\n", event);
+ delayed_event->dev = dev;
+ delayed_event->event = event;
+ delayed_event->param = param;
+ list_add_tail(&delayed_event->list, &priv->waiting_events_list);
+}
+
+static void delayed_event_release(struct mlx5_device_context *dev_ctx,
+ struct mlx5_priv *priv)
+{
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+ struct mlx5_delayed_event *de;
+ struct mlx5_delayed_event *n;
+ struct list_head temp;
+
+ INIT_LIST_HEAD(&temp);
+
+ spin_lock_irq(&priv->ctx_lock);
+
+ priv->is_accum_events = false;
+ list_splice_init(&priv->waiting_events_list, &temp);
+ if (!dev_ctx->context)
+ goto out;
+ list_for_each_entry_safe(de, n, &temp, list)
+ dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
+
+out:
+ spin_unlock_irq(&priv->ctx_lock);
+
+ list_for_each_entry_safe(de, n, &temp, list) {
+ list_del(&de->list);
+ kfree(de);
+ }
+}
+
+/* accumulating events that can come after mlx5_ib calls to
+ * ib_register_device, till adding that interface to the events list.
+ */
+static void delayed_event_start(struct mlx5_priv *priv)
+{
+ spin_lock_irq(&priv->ctx_lock);
+ priv->is_accum_events = true;
+ spin_unlock_irq(&priv->ctx_lock);
+}
+
+void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ if (!mlx5_lag_intf_add(intf, priv))
+ return;
+
+ dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
+ if (!dev_ctx)
+ return;
+
+ dev_ctx->intf = intf;
+
+ delayed_event_start(priv);
+
+ dev_ctx->context = intf->add(dev);
+ if (dev_ctx->context) {
+ set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+ if (intf->attach)
+ set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+
+ spin_lock_irq(&priv->ctx_lock);
+ list_add_tail(&dev_ctx->list, &priv->ctx_list);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (dev_ctx->intf->pfault) {
+ if (priv->pfault) {
+ mlx5_core_err(dev, "multiple page fault handlers not supported");
+ } else {
+ priv->pfault_ctx = dev_ctx->context;
+ priv->pfault = dev_ctx->intf->pfault;
+ }
+ }
+#endif
+ spin_unlock_irq(&priv->ctx_lock);
+ }
+
+ delayed_event_release(dev_ctx, priv);
+
+ if (!dev_ctx->context)
+ kfree(dev_ctx);
+}
+
+static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
+ struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+
+ list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+ if (dev_ctx->intf == intf)
+ return dev_ctx;
+ return NULL;
+}
+
+void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ dev_ctx = mlx5_get_device(intf, priv);
+ if (!dev_ctx)
+ return;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ spin_lock_irq(&priv->ctx_lock);
+ if (priv->pfault == dev_ctx->intf->pfault)
+ priv->pfault = NULL;
+ spin_unlock_irq(&priv->ctx_lock);
+
+ synchronize_srcu(&priv->pfault_srcu);
+#endif
+
+ spin_lock_irq(&priv->ctx_lock);
+ list_del(&dev_ctx->list);
+ spin_unlock_irq(&priv->ctx_lock);
+
+ if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ intf->remove(dev, dev_ctx->context);
+
+ kfree(dev_ctx);
+}
+
+static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ dev_ctx = mlx5_get_device(intf, priv);
+ if (!dev_ctx)
+ return;
+
+ delayed_event_start(priv);
+ if (intf->attach) {
+ if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
+ goto out;
+ if (intf->attach(dev, dev_ctx->context))
+ goto out;
+
+ set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+ } else {
+ if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ goto out;
+ dev_ctx->context = intf->add(dev);
+ if (!dev_ctx->context)
+ goto out;
+
+ set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+ }
+
+out:
+ delayed_event_release(dev_ctx, priv);
+}
+
+void mlx5_attach_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_interface *intf;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry(intf, &intf_list, list)
+ mlx5_attach_interface(intf, priv);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ dev_ctx = mlx5_get_device(intf, priv);
+ if (!dev_ctx)
+ return;
+
+ if (intf->detach) {
+ if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
+ return;
+ intf->detach(dev, dev_ctx->context);
+ clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+ } else {
+ if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ return;
+ intf->remove(dev, dev_ctx->context);
+ clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+ }
+}
+
+void mlx5_detach_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_interface *intf;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry(intf, &intf_list, list)
+ mlx5_detach_interface(intf, priv);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+bool mlx5_device_registered(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv;
+ bool found = false;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+ if (priv == &dev->priv)
+ found = true;
+ mutex_unlock(&mlx5_intf_mutex);
+
+ return found;
+}
+
+int mlx5_register_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_interface *intf;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_add_tail(&priv->dev_list, &mlx5_dev_list);
+ list_for_each_entry(intf, &intf_list, list)
+ mlx5_add_device(intf, priv);
+ mutex_unlock(&mlx5_intf_mutex);
+
+ return 0;
+}
+
+void mlx5_unregister_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_interface *intf;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry_reverse(intf, &intf_list, list)
+ mlx5_remove_device(intf, priv);
+ list_del(&priv->dev_list);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_register_interface(struct mlx5_interface *intf)
+{
+ struct mlx5_priv *priv;
+
+ if (!intf->add || !intf->remove)
+ return -EINVAL;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_add_tail(&intf->list, &intf_list);
+ list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+ mlx5_add_device(intf, priv);
+ mutex_unlock(&mlx5_intf_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_register_interface);
+
+void mlx5_unregister_interface(struct mlx5_interface *intf)
+{
+ struct mlx5_priv *priv;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+ mlx5_remove_device(intf, priv);
+ list_del(&intf->list);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+EXPORT_SYMBOL(mlx5_unregister_interface);
+
+/* Must be called with intf_mutex held */
+static bool mlx5_has_added_dev_by_protocol(struct mlx5_core_dev *mdev, int protocol)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_interface *intf;
+ bool found = false;
+
+ list_for_each_entry(intf, &intf_list, list) {
+ if (intf->protocol == protocol) {
+ dev_ctx = mlx5_get_device(intf, &mdev->priv);
+ if (dev_ctx && test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ found = true;
+ break;
+ }
+ }
+
+ return found;
+}
+
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+ mutex_lock(&mlx5_intf_mutex);
+ if (mlx5_has_added_dev_by_protocol(mdev, protocol)) {
+ mlx5_remove_dev_by_protocol(mdev, protocol);
+ mlx5_add_dev_by_protocol(mdev, protocol);
+ }
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ struct mlx5_device_context *dev_ctx;
+ unsigned long flags;
+ void *result = NULL;
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+
+ list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
+ if ((dev_ctx->intf->protocol == protocol) &&
+ dev_ctx->intf->get_dev) {
+ result = dev_ctx->intf->get_dev(dev_ctx->context);
+ break;
+ }
+
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+ return result;
+}
+EXPORT_SYMBOL(mlx5_get_protocol_dev);
+
+/* Must be called with intf_mutex held */
+void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+{
+ struct mlx5_interface *intf;
+
+ list_for_each_entry(intf, &intf_list, list)
+ if (intf->protocol == protocol) {
+ mlx5_add_device(intf, &dev->priv);
+ break;
+ }
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+{
+ struct mlx5_interface *intf;
+
+ list_for_each_entry(intf, &intf_list, list)
+ if (intf->protocol == protocol) {
+ mlx5_remove_device(intf, &dev->priv);
+ break;
+ }
+}
+
+static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
+{
+ return (u32)((pci_domain_nr(dev->pdev->bus) << 16) |
+ (dev->pdev->bus->number << 8) |
+ PCI_SLOT(dev->pdev->devfn));
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
+{
+ u32 pci_id = mlx5_gen_pci_id(dev);
+ struct mlx5_core_dev *res = NULL;
+ struct mlx5_core_dev *tmp_dev;
+ struct mlx5_priv *priv;
+
+ list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
+ tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
+ if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
+ res = tmp_dev;
+ break;
+ }
+ }
+
+ return res;
+}
+
+void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+ unsigned long param)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_device_context *dev_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+
+ if (priv->is_accum_events)
+ add_delayed_event(priv, dev, event, param);
+
+ /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
+ * still in priv->ctx_list. In this case, only notify the dev_ctx if its
+ * ADDED or ATTACHED bit are set.
+ */
+ list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+ if (dev_ctx->intf->event &&
+ (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
+ test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
+ dev_ctx->intf->event(dev, dev_ctx->context, event, param);
+
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+}
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_core_page_fault(struct mlx5_core_dev *dev,
+ struct mlx5_pagefault *pfault)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&priv->pfault_srcu);
+ if (priv->pfault)
+ priv->pfault(dev, priv->pfault_ctx, pfault);
+ srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
+}
+#endif
+
+void mlx5_dev_list_lock(void)
+{
+ mutex_lock(&mlx5_intf_mutex);
+}
+
+void mlx5_dev_list_unlock(void)
+{
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_dev_list_trylock(void)
+{
+ return mutex_trylock(&mlx5_intf_mutex);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
new file mode 100644
index 000000000..0f11fff32
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include "fs_tracepoint.h"
+#include <linux/stringify.h>
+
+#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name
+#define MASK_VAL(type, spec, name, mask, val, fld) \
+ DECLARE_MASK_VAL(type, name) = \
+ {.m = MLX5_GET(spec, mask, fld),\
+ .v = MLX5_GET(spec, val, fld)}
+#define MASK_VAL_BE(type, spec, name, mask, val, fld) \
+ DECLARE_MASK_VAL(type, name) = \
+ {.m = MLX5_GET_BE(type, spec, mask, fld),\
+ .v = MLX5_GET_BE(type, spec, val, fld)}
+#define GET_MASKED_VAL(name) (name.m & name.v)
+
+#define GET_MASK_VAL(name, type, mask, val, fld) \
+ (name.m = MLX5_GET(type, mask, fld), \
+ name.v = MLX5_GET(type, val, fld), \
+ name.m & name.v)
+#define PRINT_MASKED_VAL(name, p, format) { \
+ if (name.m) \
+ trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \
+ }
+#define PRINT_MASKED_VALP(name, cast, p, format) { \
+ if (name.m) \
+ trace_seq_printf(p, __stringify(name) "=" format " ", \
+ (cast)&name.v);\
+ }
+
+static void print_lyr_2_4_hdrs(struct trace_seq *p,
+ const u32 *mask, const u32 *value)
+{
+#define MASK_VAL_L2(type, name, fld) \
+ MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld)
+ DECLARE_MASK_VAL(u64, smac) = {
+ .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0),
+ .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)};
+ DECLARE_MASK_VAL(u64, dmac) = {
+ .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0),
+ .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)};
+ MASK_VAL_L2(u16, ethertype, ethertype);
+
+ PRINT_MASKED_VALP(smac, u8 *, p, "%pM");
+ PRINT_MASKED_VALP(dmac, u8 *, p, "%pM");
+ PRINT_MASKED_VAL(ethertype, p, "%04x");
+
+ if (ethertype.m == 0xffff) {
+ if (ethertype.v == ETH_P_IP) {
+#define MASK_VAL_L2_BE(type, name, fld) \
+ MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld)
+ MASK_VAL_L2_BE(u32, src_ipv4,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MASK_VAL_L2_BE(u32, dst_ipv4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
+ "%pI4");
+ PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
+ "%pI4");
+ } else if (ethertype.v == ETH_P_IPV6) {
+ static const struct in6_addr full_ones = {
+ .in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff)},
+ };
+ DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
+ DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
+
+ memcpy(src_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.m));
+ memcpy(dst_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.m));
+ memcpy(src_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.v));
+ memcpy(dst_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.v));
+
+ if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "src_ipv6=%pI6 ",
+ &src_ipv6.v);
+ if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "dst_ipv6=%pI6 ",
+ &dst_ipv6.v);
+ }
+ }
+
+#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\
+ MASK_VAL_L2(type, name, fld); \
+ PRINT_MASKED_VAL(name, p, format); \
+}
+
+ PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x");
+ PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x");
+ PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x");
+ PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x");
+ PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d");
+ PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x");
+ PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x");
+ PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d");
+ PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d");
+ PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d");
+}
+
+static void print_misc_parameters_hdrs(struct trace_seq *p,
+ const u32 *mask, const u32 *value)
+{
+#define MASK_VAL_MISC(type, name, fld) \
+ MASK_VAL(type, fte_match_set_misc, name, mask, value, fld)
+#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\
+ MASK_VAL_MISC(type, name, fld); \
+ PRINT_MASKED_VAL(name, p, format); \
+}
+ DECLARE_MASK_VAL(u64, gre_key) = {
+ .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 |
+ MLX5_GET(fte_match_set_misc, mask, gre_key_l),
+ .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 |
+ MLX5_GET(fte_match_set_misc, value, gre_key_l)};
+
+ PRINT_MASKED_VAL(gre_key, p, "%llu");
+ PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
+ PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio,
+ p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u");
+ PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio,
+ p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u");
+ PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u");
+
+ PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag,
+ outer_second_cvlan_tag, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag,
+ inner_second_cvlan_tag, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag,
+ outer_second_svlan_tag, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag,
+ inner_second_svlan_tag, p, "%u");
+
+ PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u");
+
+ PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u");
+ PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label,
+ p, "%x");
+ PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label,
+ p, "%x");
+}
+
+const char *parse_fs_hdrs(struct trace_seq *p,
+ u8 match_criteria_enable,
+ const u32 *mask_outer,
+ const u32 *mask_misc,
+ const u32 *mask_inner,
+ const u32 *value_outer,
+ const u32 *value_misc,
+ const u32 *value_inner)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (match_criteria_enable &
+ 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
+ trace_seq_printf(p, "[outer] ");
+ print_lyr_2_4_hdrs(p, mask_outer, value_outer);
+ }
+
+ if (match_criteria_enable &
+ 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
+ trace_seq_printf(p, "[misc] ");
+ print_misc_parameters_hdrs(p, mask_misc, value_misc);
+ }
+ if (match_criteria_enable &
+ 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
+ trace_seq_printf(p, "[inner] ");
+ print_lyr_2_4_hdrs(p, mask_inner, value_inner);
+ }
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+const char *parse_fs_dst(struct trace_seq *p,
+ const struct mlx5_flow_destination *dst,
+ u32 counter_id)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ switch (dst->type) {
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ trace_seq_printf(p, "vport=%u\n", dst->vport.num);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ trace_seq_printf(p, "ft=%p\n", dst->ft);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ trace_seq_printf(p, "ft_num=%u\n", dst->ft_num);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_TIR:
+ trace_seq_printf(p, "tir=%u\n", dst->tir_num);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
+ trace_seq_printf(p, "counter_id=%u\n", counter_id);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_PORT:
+ trace_seq_printf(p, "port\n");
+ break;
+ }
+
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule);
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
new file mode 100644
index 000000000..0240aee91
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_FS_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include "../fs_core.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \
+ vinner, vmisc) \
+ parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\
+ vinner, vmisc)
+
+const char *parse_fs_hdrs(struct trace_seq *p,
+ u8 match_criteria_enable,
+ const u32 *mask_outer,
+ const u32 *mask_misc,
+ const u32 *mask_inner,
+ const u32 *value_outer,
+ const u32 *value_misc,
+ const u32 *value_inner);
+
+#define __parse_fs_dst(dst, counter_id) \
+ parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id)
+
+const char *parse_fs_dst(struct trace_seq *p,
+ const struct mlx5_flow_destination *dst,
+ u32 counter_id);
+
+TRACE_EVENT(mlx5_fs_add_fg,
+ TP_PROTO(const struct mlx5_flow_group *fg),
+ TP_ARGS(fg),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_group *, fg)
+ __field(const struct mlx5_flow_table *, ft)
+ __field(u32, start_index)
+ __field(u32, end_index)
+ __field(u32, id)
+ __field(u8, mask_enable)
+ __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+ ),
+ TP_fast_assign(
+ __entry->fg = fg;
+ fs_get_obj(__entry->ft, fg->node.parent);
+ __entry->start_index = fg->start_index;
+ __entry->end_index = fg->start_index + fg->max_ftes;
+ __entry->id = fg->id;
+ __entry->mask_enable = fg->mask.match_criteria_enable;
+ memcpy(__entry->mask_outer,
+ MLX5_ADDR_OF(fte_match_param,
+ &fg->mask.match_criteria,
+ outer_headers),
+ sizeof(__entry->mask_outer));
+ memcpy(__entry->mask_inner,
+ MLX5_ADDR_OF(fte_match_param,
+ &fg->mask.match_criteria,
+ inner_headers),
+ sizeof(__entry->mask_inner));
+ memcpy(__entry->mask_misc,
+ MLX5_ADDR_OF(fte_match_param,
+ &fg->mask.match_criteria,
+ misc_parameters),
+ sizeof(__entry->mask_misc));
+
+ ),
+ TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n",
+ __entry->fg, __entry->ft, __entry->id,
+ __entry->start_index, __entry->end_index,
+ __entry->mask_enable,
+ __parse_fs_hdrs(__entry->mask_enable,
+ __entry->mask_outer,
+ __entry->mask_misc,
+ __entry->mask_inner,
+ __entry->mask_outer,
+ __entry->mask_misc,
+ __entry->mask_inner))
+ );
+
+TRACE_EVENT(mlx5_fs_del_fg,
+ TP_PROTO(const struct mlx5_flow_group *fg),
+ TP_ARGS(fg),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_group *, fg)
+ __field(u32, id)
+ ),
+ TP_fast_assign(
+ __entry->fg = fg;
+ __entry->id = fg->id;
+
+ ),
+ TP_printk("fg=%p id=%u\n",
+ __entry->fg, __entry->id)
+ );
+
+#define ACTION_FLAGS \
+ {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\
+ {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\
+ {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\
+ {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP, "VLAN_POP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2, "VLAN_PUSH_2"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2, "VLAN_POP_2"},\
+ {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"}
+
+TRACE_EVENT(mlx5_fs_set_fte,
+ TP_PROTO(const struct fs_fte *fte, int new_fte),
+ TP_ARGS(fte, new_fte),
+ TP_STRUCT__entry(
+ __field(const struct fs_fte *, fte)
+ __field(const struct mlx5_flow_group *, fg)
+ __field(u32, group_index)
+ __field(u32, index)
+ __field(u32, action)
+ __field(u32, flow_tag)
+ __field(u8, mask_enable)
+ __field(int, new_fte)
+ __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+ __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+ ),
+ TP_fast_assign(
+ __entry->fte = fte;
+ __entry->new_fte = new_fte;
+ fs_get_obj(__entry->fg, fte->node.parent);
+ __entry->group_index = __entry->fg->id;
+ __entry->index = fte->index;
+ __entry->action = fte->action.action;
+ __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
+ __entry->flow_tag = fte->action.flow_tag;
+ memcpy(__entry->mask_outer,
+ MLX5_ADDR_OF(fte_match_param,
+ &__entry->fg->mask.match_criteria,
+ outer_headers),
+ sizeof(__entry->mask_outer));
+ memcpy(__entry->mask_inner,
+ MLX5_ADDR_OF(fte_match_param,
+ &__entry->fg->mask.match_criteria,
+ inner_headers),
+ sizeof(__entry->mask_inner));
+ memcpy(__entry->mask_misc,
+ MLX5_ADDR_OF(fte_match_param,
+ &__entry->fg->mask.match_criteria,
+ misc_parameters),
+ sizeof(__entry->mask_misc));
+ memcpy(__entry->value_outer,
+ MLX5_ADDR_OF(fte_match_param,
+ &fte->val,
+ outer_headers),
+ sizeof(__entry->value_outer));
+ memcpy(__entry->value_inner,
+ MLX5_ADDR_OF(fte_match_param,
+ &fte->val,
+ inner_headers),
+ sizeof(__entry->value_inner));
+ memcpy(__entry->value_misc,
+ MLX5_ADDR_OF(fte_match_param,
+ &fte->val,
+ misc_parameters),
+ sizeof(__entry->value_misc));
+
+ ),
+ TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n",
+ __entry->new_fte ? "add" : "set",
+ __entry->fte, __entry->fg, __entry->index,
+ __entry->group_index, __print_flags(__entry->action, "|",
+ ACTION_FLAGS),
+ __entry->flow_tag,
+ __parse_fs_hdrs(__entry->mask_enable,
+ __entry->mask_outer,
+ __entry->mask_misc,
+ __entry->mask_inner,
+ __entry->value_outer,
+ __entry->value_misc,
+ __entry->value_inner))
+ );
+
+TRACE_EVENT(mlx5_fs_del_fte,
+ TP_PROTO(const struct fs_fte *fte),
+ TP_ARGS(fte),
+ TP_STRUCT__entry(
+ __field(const struct fs_fte *, fte)
+ __field(u32, index)
+ ),
+ TP_fast_assign(
+ __entry->fte = fte;
+ __entry->index = fte->index;
+
+ ),
+ TP_printk("fte=%p index=%u\n",
+ __entry->fte, __entry->index)
+ );
+
+TRACE_EVENT(mlx5_fs_add_rule,
+ TP_PROTO(const struct mlx5_flow_rule *rule),
+ TP_ARGS(rule),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_rule *, rule)
+ __field(const struct fs_fte *, fte)
+ __field(u32, sw_action)
+ __field(u32, index)
+ __field(u32, counter_id)
+ __array(u8, destination, sizeof(struct mlx5_flow_destination))
+ ),
+ TP_fast_assign(
+ __entry->rule = rule;
+ fs_get_obj(__entry->fte, rule->node.parent);
+ __entry->index = __entry->fte->dests_size - 1;
+ __entry->sw_action = rule->sw_action;
+ memcpy(__entry->destination,
+ &rule->dest_attr,
+ sizeof(__entry->destination));
+ if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
+ rule->dest_attr.counter)
+ __entry->counter_id =
+ rule->dest_attr.counter->id;
+ ),
+ TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n",
+ __entry->rule, __entry->fte, __entry->index,
+ __print_flags(__entry->sw_action, "|", ACTION_FLAGS),
+ __parse_fs_dst(__entry->destination, __entry->counter_id))
+ );
+
+TRACE_EVENT(mlx5_fs_del_rule,
+ TP_PROTO(const struct mlx5_flow_rule *rule),
+ TP_ARGS(rule),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_rule *, rule)
+ __field(const struct fs_fte *, fte)
+ ),
+ TP_fast_assign(
+ __entry->rule = rule;
+ fs_get_obj(__entry->fte, rule->node.parent);
+ ),
+ TP_printk("rule=%p fte=%p\n",
+ __entry->rule, __entry->fte)
+ );
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE fs_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
new file mode 100644
index 000000000..a22e932a0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -0,0 +1,950 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#define CREATE_TRACE_POINTS
+#include "fw_tracer.h"
+#include "fw_tracer_tracepoint.h"
+
+static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
+{
+ u32 *string_db_base_address_out = tracer->str_db.base_address_out;
+ u32 *string_db_size_out = tracer->str_db.size_out;
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ void *mtrc_cap_sp;
+ int err, i;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CAP, 0, 0);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Error reading tracer caps %d\n",
+ err);
+ return err;
+ }
+
+ if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) {
+ mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n");
+ return -ENOTSUPP;
+ }
+
+ tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver);
+ tracer->str_db.first_string_trace =
+ MLX5_GET(mtrc_cap, out, first_string_trace);
+ tracer->str_db.num_string_trace =
+ MLX5_GET(mtrc_cap, out, num_string_trace);
+ tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db);
+ tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+ for (i = 0; i < tracer->str_db.num_string_db; i++) {
+ mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]);
+ string_db_base_address_out[i] = MLX5_GET(mtrc_string_db_param,
+ mtrc_cap_sp,
+ string_db_base_address);
+ string_db_size_out[i] = MLX5_GET(mtrc_string_db_param,
+ mtrc_cap_sp, string_db_size);
+ }
+
+ return err;
+}
+
+static int mlx5_set_mtrc_caps_trace_owner(struct mlx5_fw_tracer *tracer,
+ u32 *out, u32 out_size,
+ u8 trace_owner)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+ MLX5_SET(mtrc_cap, in, trace_owner, trace_owner);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, out_size,
+ MLX5_REG_MTRC_CAP, 0, 1);
+}
+
+static int mlx5_fw_tracer_ownership_acquire(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ int err;
+
+ err = mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+ MLX5_FW_TRACER_ACQUIRE_OWNERSHIP);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Acquire tracer ownership failed %d\n",
+ err);
+ return err;
+ }
+
+ tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+ if (!tracer->owner)
+ return -EBUSY;
+
+ return 0;
+}
+
+static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer)
+{
+ u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+ mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+ MLX5_FW_TRACER_RELEASE_OWNERSHIP);
+ tracer->owner = false;
+}
+
+static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct device *ddev = &dev->pdev->dev;
+ dma_addr_t dma;
+ void *buff;
+ gfp_t gfp;
+ int err;
+
+ tracer->buff.size = TRACE_BUFFER_SIZE_BYTE;
+
+ gfp = GFP_KERNEL | __GFP_ZERO;
+ buff = (void *)__get_free_pages(gfp,
+ get_order(tracer->buff.size));
+ if (!buff) {
+ err = -ENOMEM;
+ mlx5_core_warn(dev, "FWTracer: Failed to allocate pages, %d\n", err);
+ return err;
+ }
+ tracer->buff.log_buf = buff;
+
+ dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
+ mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n",
+ dma_mapping_error(ddev, dma));
+ err = -ENOMEM;
+ goto free_pages;
+ }
+ tracer->buff.dma = dma;
+
+ return 0;
+
+free_pages:
+ free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
+
+ return err;
+}
+
+static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct device *ddev = &dev->pdev->dev;
+
+ if (!tracer->buff.log_buf)
+ return;
+
+ dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE);
+ free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
+}
+
+static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ int err, inlen, i;
+ __be64 *mtt;
+ void *mkc;
+ u32 *in;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+ sizeof(*mtt) * round_up(TRACER_BUFFER_PAGE_NUM, 2);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+ DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
+ mtt = (u64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++)
+ mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE);
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, pd, tracer->buff.pdn);
+ MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
+ MLX5_SET64(mkc, mkc, start_addr, tracer->buff.dma);
+ MLX5_SET64(mkc, mkc, len, tracer->buff.size);
+ err = mlx5_core_create_mkey(dev, &tracer->buff.mkey, in, inlen);
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to create mkey, %d\n", err);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5_fw_tracer_free_strings_db(struct mlx5_fw_tracer *tracer)
+{
+ u32 num_string_db = tracer->str_db.num_string_db;
+ int i;
+
+ for (i = 0; i < num_string_db; i++) {
+ kfree(tracer->str_db.buffer[i]);
+ tracer->str_db.buffer[i] = NULL;
+ }
+}
+
+static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer)
+{
+ u32 *string_db_size_out = tracer->str_db.size_out;
+ u32 num_string_db = tracer->str_db.num_string_db;
+ int i;
+
+ for (i = 0; i < num_string_db; i++) {
+ tracer->str_db.buffer[i] = kzalloc(string_db_size_out[i], GFP_KERNEL);
+ if (!tracer->str_db.buffer[i])
+ goto free_strings_db;
+ }
+
+ return 0;
+
+free_strings_db:
+ mlx5_fw_tracer_free_strings_db(tracer);
+ return -ENOMEM;
+}
+
+static void mlx5_tracer_read_strings_db(struct work_struct *work)
+{
+ struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
+ read_fw_strings_work);
+ u32 num_of_reads, num_string_db = tracer->str_db.num_string_db;
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ u32 leftovers, offset;
+ int err = 0, i, j;
+ u32 *out, outlen;
+ void *out_value;
+
+ outlen = MLX5_ST_SZ_BYTES(mtrc_stdb) + STRINGS_DB_READ_SIZE_BYTES;
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < num_string_db; i++) {
+ offset = 0;
+ MLX5_SET(mtrc_stdb, in, string_db_index, i);
+ num_of_reads = tracer->str_db.size_out[i] /
+ STRINGS_DB_READ_SIZE_BYTES;
+ leftovers = (tracer->str_db.size_out[i] %
+ STRINGS_DB_READ_SIZE_BYTES) /
+ STRINGS_DB_LEFTOVER_SIZE_BYTES;
+
+ MLX5_SET(mtrc_stdb, in, read_size, STRINGS_DB_READ_SIZE_BYTES);
+ for (j = 0; j < num_of_reads; j++) {
+ MLX5_SET(mtrc_stdb, in, start_offset, offset);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ outlen, MLX5_REG_MTRC_STDB,
+ 0, 1);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n",
+ err);
+ goto out_free;
+ }
+
+ out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data);
+ memcpy(tracer->str_db.buffer[i] + offset, out_value,
+ STRINGS_DB_READ_SIZE_BYTES);
+ offset += STRINGS_DB_READ_SIZE_BYTES;
+ }
+
+ /* Strings database is aligned to 64, need to read leftovers*/
+ MLX5_SET(mtrc_stdb, in, read_size,
+ STRINGS_DB_LEFTOVER_SIZE_BYTES);
+ for (j = 0; j < leftovers; j++) {
+ MLX5_SET(mtrc_stdb, in, start_offset, offset);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ outlen, MLX5_REG_MTRC_STDB,
+ 0, 1);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n",
+ err);
+ goto out_free;
+ }
+
+ out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data);
+ memcpy(tracer->str_db.buffer[i] + offset, out_value,
+ STRINGS_DB_LEFTOVER_SIZE_BYTES);
+ offset += STRINGS_DB_LEFTOVER_SIZE_BYTES;
+ }
+ }
+
+ tracer->str_db.loaded = true;
+
+out_free:
+ kfree(out);
+out:
+ return;
+}
+
+static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ int err;
+
+ MLX5_SET(mtrc_ctrl, in, arm_event, 1);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CTRL, 0, 1);
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err);
+}
+
+static const char *VAL_PARM = "%llx";
+static const char *REPLACE_64_VAL_PARM = "%x%x";
+static const char *PARAM_CHAR = "%";
+
+static int mlx5_tracer_message_hash(u32 message_id)
+{
+ return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1);
+}
+
+static struct tracer_string_format *mlx5_tracer_message_insert(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct hlist_head *head =
+ &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)];
+ struct tracer_string_format *cur_string;
+
+ cur_string = kzalloc(sizeof(*cur_string), GFP_KERNEL);
+ if (!cur_string)
+ return NULL;
+
+ hlist_add_head(&cur_string->hlist, head);
+
+ return cur_string;
+}
+
+static struct tracer_string_format *mlx5_tracer_get_string(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct tracer_string_format *cur_string;
+ u32 str_ptr, offset;
+ int i;
+
+ str_ptr = tracer_event->string_event.string_param;
+
+ for (i = 0; i < tracer->str_db.num_string_db; i++) {
+ if (str_ptr > tracer->str_db.base_address_out[i] &&
+ str_ptr < tracer->str_db.base_address_out[i] +
+ tracer->str_db.size_out[i]) {
+ offset = str_ptr - tracer->str_db.base_address_out[i];
+ /* add it to the hash */
+ cur_string = mlx5_tracer_message_insert(tracer, tracer_event);
+ if (!cur_string)
+ return NULL;
+ cur_string->string = (char *)(tracer->str_db.buffer[i] +
+ offset);
+ return cur_string;
+ }
+ }
+
+ return NULL;
+}
+
+static void mlx5_tracer_clean_message(struct tracer_string_format *str_frmt)
+{
+ hlist_del(&str_frmt->hlist);
+ kfree(str_frmt);
+}
+
+static int mlx5_tracer_get_num_of_params(char *str)
+{
+ char *substr, *pstr = str;
+ int num_of_params = 0;
+
+ /* replace %llx with %x%x */
+ substr = strstr(pstr, VAL_PARM);
+ while (substr) {
+ memcpy(substr, REPLACE_64_VAL_PARM, 4);
+ pstr = substr;
+ substr = strstr(pstr, VAL_PARM);
+ }
+
+ /* count all the % characters */
+ substr = strstr(str, PARAM_CHAR);
+ while (substr) {
+ num_of_params += 1;
+ str = substr + 1;
+ substr = strstr(str, PARAM_CHAR);
+ }
+
+ return num_of_params;
+}
+
+static struct tracer_string_format *mlx5_tracer_message_find(struct hlist_head *head,
+ u8 event_id, u32 tmsn)
+{
+ struct tracer_string_format *message;
+
+ hlist_for_each_entry(message, head, hlist)
+ if (message->event_id == event_id && message->tmsn == tmsn)
+ return message;
+
+ return NULL;
+}
+
+static struct tracer_string_format *mlx5_tracer_message_get(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct hlist_head *head =
+ &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)];
+
+ return mlx5_tracer_message_find(head, tracer_event->event_id, tracer_event->string_event.tmsn);
+}
+
+static void poll_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event, u64 *trace)
+{
+ u32 timestamp_low, timestamp_mid, timestamp_high, urts;
+
+ tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id);
+ tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost);
+
+ switch (tracer_event->event_id) {
+ case TRACER_EVENT_TYPE_TIMESTAMP:
+ tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP;
+ urts = MLX5_GET(tracer_timestamp_event, trace, urts);
+ if (tracer->trc_ver == 0)
+ tracer_event->timestamp_event.unreliable = !!(urts >> 2);
+ else
+ tracer_event->timestamp_event.unreliable = !!(urts & 1);
+
+ timestamp_low = MLX5_GET(tracer_timestamp_event,
+ trace, timestamp7_0);
+ timestamp_mid = MLX5_GET(tracer_timestamp_event,
+ trace, timestamp39_8);
+ timestamp_high = MLX5_GET(tracer_timestamp_event,
+ trace, timestamp52_40);
+
+ tracer_event->timestamp_event.timestamp =
+ ((u64)timestamp_high << 40) |
+ ((u64)timestamp_mid << 8) |
+ (u64)timestamp_low;
+ break;
+ default:
+ if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
+ tracer_event->event_id <= tracer->str_db.first_string_trace +
+ tracer->str_db.num_string_trace) {
+ tracer_event->type = TRACER_EVENT_TYPE_STRING;
+ tracer_event->string_event.timestamp =
+ MLX5_GET(tracer_string_event, trace, timestamp);
+ tracer_event->string_event.string_param =
+ MLX5_GET(tracer_string_event, trace, string_param);
+ tracer_event->string_event.tmsn =
+ MLX5_GET(tracer_string_event, trace, tmsn);
+ tracer_event->string_event.tdsn =
+ MLX5_GET(tracer_string_event, trace, tdsn);
+ } else {
+ tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED;
+ }
+ break;
+ }
+}
+
+static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event)
+{
+ struct tracer_event tracer_event;
+ u8 event_id;
+
+ event_id = MLX5_GET(tracer_event, ts_event, event_id);
+
+ if (event_id == TRACER_EVENT_TYPE_TIMESTAMP)
+ poll_trace(tracer, &tracer_event, ts_event);
+ else
+ tracer_event.timestamp_event.timestamp = 0;
+
+ return tracer_event.timestamp_event.timestamp;
+}
+
+static void mlx5_fw_tracer_clean_print_hash(struct mlx5_fw_tracer *tracer)
+{
+ struct tracer_string_format *str_frmt;
+ struct hlist_node *n;
+ int i;
+
+ for (i = 0; i < MESSAGE_HASH_SIZE; i++) {
+ hlist_for_each_entry_safe(str_frmt, n, &tracer->hash[i], hlist)
+ mlx5_tracer_clean_message(str_frmt);
+ }
+}
+
+static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer)
+{
+ struct tracer_string_format *str_frmt, *tmp_str;
+
+ list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list,
+ list)
+ list_del(&str_frmt->list);
+}
+
+static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
+ struct mlx5_core_dev *dev,
+ u64 trace_timestamp)
+{
+ char tmp[512];
+
+ snprintf(tmp, sizeof(tmp), str_frmt->string,
+ str_frmt->params[0],
+ str_frmt->params[1],
+ str_frmt->params[2],
+ str_frmt->params[3],
+ str_frmt->params[4],
+ str_frmt->params[5],
+ str_frmt->params[6]);
+
+ trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
+ str_frmt->event_id, tmp);
+
+ /* remove it from hash */
+ mlx5_tracer_clean_message(str_frmt);
+}
+
+static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct tracer_string_format *cur_string;
+
+ if (tracer_event->string_event.tdsn == 0) {
+ cur_string = mlx5_tracer_get_string(tracer, tracer_event);
+ if (!cur_string)
+ return -1;
+
+ cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
+ cur_string->last_param_num = 0;
+ cur_string->event_id = tracer_event->event_id;
+ cur_string->tmsn = tracer_event->string_event.tmsn;
+ cur_string->timestamp = tracer_event->string_event.timestamp;
+ cur_string->lost = tracer_event->lost_event;
+ if (cur_string->num_of_params == 0) /* trace with no params */
+ list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+ } else {
+ cur_string = mlx5_tracer_message_get(tracer, tracer_event);
+ if (!cur_string) {
+ pr_debug("%s Got string event for unknown string tdsm: %d\n",
+ __func__, tracer_event->string_event.tmsn);
+ return -1;
+ }
+ cur_string->last_param_num += 1;
+ if (cur_string->last_param_num > TRACER_MAX_PARAMS) {
+ pr_debug("%s Number of params exceeds the max (%d)\n",
+ __func__, TRACER_MAX_PARAMS);
+ list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+ return 0;
+ }
+ /* keep the new parameter */
+ cur_string->params[cur_string->last_param_num - 1] =
+ tracer_event->string_event.string_param;
+ if (cur_string->last_param_num == cur_string->num_of_params)
+ list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+ }
+
+ return 0;
+}
+
+static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct tracer_timestamp_event timestamp_event =
+ tracer_event->timestamp_event;
+ struct tracer_string_format *str_frmt, *tmp_str;
+ struct mlx5_core_dev *dev = tracer->dev;
+ u64 trace_timestamp;
+
+ list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, list) {
+ list_del(&str_frmt->list);
+ if (str_frmt->timestamp < (timestamp_event.timestamp & MASK_6_0))
+ trace_timestamp = (timestamp_event.timestamp & MASK_52_7) |
+ (str_frmt->timestamp & MASK_6_0);
+ else
+ trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) |
+ (str_frmt->timestamp & MASK_6_0);
+
+ mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);
+ }
+}
+
+static int mlx5_tracer_handle_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ if (tracer_event->type == TRACER_EVENT_TYPE_STRING) {
+ mlx5_tracer_handle_string_trace(tracer, tracer_event);
+ } else if (tracer_event->type == TRACER_EVENT_TYPE_TIMESTAMP) {
+ if (!tracer_event->timestamp_event.unreliable)
+ mlx5_tracer_handle_timestamp_trace(tracer, tracer_event);
+ } else {
+ pr_debug("%s Got unrecognised type %d for parsing, exiting..\n",
+ __func__, tracer_event->type);
+ }
+ return 0;
+}
+
+static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
+{
+ struct mlx5_fw_tracer *tracer =
+ container_of(work, struct mlx5_fw_tracer, handle_traces_work);
+ u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK];
+ u32 block_count, start_offset, prev_start_offset, prev_consumer_index;
+ u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event);
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct tracer_event tracer_event;
+ int i;
+
+ mlx5_core_dbg(dev, "FWTracer: Handle Trace event, owner=(%d)\n", tracer->owner);
+ if (!tracer->owner)
+ return;
+
+ if (unlikely(!tracer->str_db.loaded))
+ goto arm;
+
+ block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
+ start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+ /* Copy the block to local buffer to avoid HW override while being processed*/
+ memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+ TRACER_BLOCK_SIZE_BYTE);
+
+ block_timestamp =
+ get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+
+ while (block_timestamp > tracer->last_timestamp) {
+ /* Check block override if its not the first block */
+ if (!tracer->last_timestamp) {
+ u64 *ts_event;
+ /* To avoid block override be the HW in case of buffer
+ * wraparound, the time stamp of the previous block
+ * should be compared to the last timestamp handled
+ * by the driver.
+ */
+ prev_consumer_index =
+ (tracer->buff.consumer_index - 1) & (block_count - 1);
+ prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+ ts_event = tracer->buff.log_buf + prev_start_offset +
+ (TRACES_PER_BLOCK - 1) * trace_event_size;
+ last_block_timestamp = get_block_timestamp(tracer, ts_event);
+ /* If previous timestamp different from last stored
+ * timestamp then there is a good chance that the
+ * current buffer is overwritten and therefore should
+ * not be parsed.
+ */
+ if (tracer->last_timestamp != last_block_timestamp) {
+ mlx5_core_warn(dev, "FWTracer: Events were lost\n");
+ tracer->last_timestamp = block_timestamp;
+ tracer->buff.consumer_index =
+ (tracer->buff.consumer_index + 1) & (block_count - 1);
+ break;
+ }
+ }
+
+ /* Parse events */
+ for (i = 0; i < TRACES_PER_BLOCK ; i++) {
+ poll_trace(tracer, &tracer_event, &tmp_trace_block[i]);
+ mlx5_tracer_handle_trace(tracer, &tracer_event);
+ }
+
+ tracer->buff.consumer_index =
+ (tracer->buff.consumer_index + 1) & (block_count - 1);
+
+ tracer->last_timestamp = block_timestamp;
+ start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+ memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+ TRACER_BLOCK_SIZE_BYTE);
+ block_timestamp = get_block_timestamp(tracer,
+ &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+ }
+
+arm:
+ mlx5_fw_tracer_arm(dev);
+}
+
+static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+ int err;
+
+ MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
+ MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
+ ilog2(TRACER_BUFFER_PAGE_NUM));
+ MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CONF, 0, 1);
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
+
+ return err;
+}
+
+static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ int err;
+
+ MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS);
+ MLX5_SET(mtrc_ctrl, in, trace_status, status);
+ MLX5_SET(mtrc_ctrl, in, arm_event, arm);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CTRL, 0, 1);
+
+ if (!err && status)
+ tracer->last_timestamp = 0;
+
+ return err;
+}
+
+static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ int err;
+
+ err = mlx5_fw_tracer_ownership_acquire(tracer);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+ /* Don't fail since ownership can be acquired on a later FW event */
+ return 0;
+ }
+
+ err = mlx5_fw_tracer_set_mtrc_conf(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err);
+ goto release_ownership;
+ }
+
+ /* enable tracer & trace events */
+ err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err);
+ goto release_ownership;
+ }
+
+ mlx5_core_dbg(dev, "FWTracer: Ownership granted and active\n");
+ return 0;
+
+release_ownership:
+ mlx5_fw_tracer_ownership_release(tracer);
+ return err;
+}
+
+static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+{
+ struct mlx5_fw_tracer *tracer =
+ container_of(work, struct mlx5_fw_tracer, ownership_change_work);
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner);
+ if (tracer->owner) {
+ tracer->owner = false;
+ tracer->buff.consumer_index = 0;
+ return;
+ }
+
+ mlx5_fw_tracer_start(tracer);
+}
+
+/* Create software resources (Buffers, etc ..) */
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_tracer *tracer = NULL;
+ int err;
+
+ if (!MLX5_CAP_MCAM_REG(dev, tracer_registers)) {
+ mlx5_core_dbg(dev, "FWTracer: Tracer capability not present\n");
+ return NULL;
+ }
+
+ tracer = kvzalloc(sizeof(*tracer), GFP_KERNEL);
+ if (!tracer)
+ return ERR_PTR(-ENOMEM);
+
+ tracer->work_queue = create_singlethread_workqueue("mlx5_fw_tracer");
+ if (!tracer->work_queue) {
+ err = -ENOMEM;
+ goto free_tracer;
+ }
+
+ tracer->dev = dev;
+
+ INIT_LIST_HEAD(&tracer->ready_strings_list);
+ INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change);
+ INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db);
+ INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces);
+
+
+ err = mlx5_query_mtrc_caps(tracer);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+ goto destroy_workqueue;
+ }
+
+ err = mlx5_fw_tracer_create_log_buf(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Create log buffer failed %d\n", err);
+ goto destroy_workqueue;
+ }
+
+ err = mlx5_fw_tracer_allocate_strings_db(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Allocate strings database failed %d\n", err);
+ goto free_log_buf;
+ }
+
+ mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
+
+ return tracer;
+
+free_log_buf:
+ mlx5_fw_tracer_destroy_log_buf(tracer);
+destroy_workqueue:
+ tracer->dev = NULL;
+ destroy_workqueue(tracer->work_queue);
+free_tracer:
+ kvfree(tracer);
+ return ERR_PTR(err);
+}
+
+/* Create HW resources + start tracer
+ * must be called before Async EQ is created
+ */
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev;
+ int err;
+
+ if (IS_ERR_OR_NULL(tracer))
+ return 0;
+
+ dev = tracer->dev;
+
+ if (!tracer->str_db.loaded)
+ queue_work(tracer->work_queue, &tracer->read_fw_strings_work);
+
+ err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err);
+ return err;
+ }
+
+ err = mlx5_fw_tracer_create_mkey(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to create mkey %d\n", err);
+ goto err_dealloc_pd;
+ }
+
+ mlx5_fw_tracer_start(tracer);
+
+ return 0;
+
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
+ return err;
+}
+
+/* Stop tracer + Cleanup HW resources
+ * must be called after Async EQ is destroyed
+ */
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
+{
+ if (IS_ERR_OR_NULL(tracer))
+ return;
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
+ tracer->owner);
+
+ cancel_work_sync(&tracer->ownership_change_work);
+ cancel_work_sync(&tracer->handle_traces_work);
+
+ if (tracer->owner)
+ mlx5_fw_tracer_ownership_release(tracer);
+
+ mlx5_core_destroy_mkey(tracer->dev, &tracer->buff.mkey);
+ mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
+}
+
+/* Free software resources (Buffers, etc ..) */
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
+{
+ if (IS_ERR_OR_NULL(tracer))
+ return;
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: Destroy\n");
+
+ cancel_work_sync(&tracer->read_fw_strings_work);
+ mlx5_fw_tracer_clean_ready_list(tracer);
+ mlx5_fw_tracer_clean_print_hash(tracer);
+ mlx5_fw_tracer_free_strings_db(tracer);
+ mlx5_fw_tracer_destroy_log_buf(tracer);
+ flush_workqueue(tracer->work_queue);
+ destroy_workqueue(tracer->work_queue);
+ kvfree(tracer);
+}
+
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+ struct mlx5_fw_tracer *tracer = dev->tracer;
+
+ if (!tracer)
+ return;
+
+ switch (eqe->sub_type) {
+ case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state))
+ queue_work(tracer->work_queue, &tracer->ownership_change_work);
+ break;
+ case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ break;
+ default:
+ mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
+ eqe->sub_type);
+ }
+}
+
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
new file mode 100644
index 000000000..0347f2dd5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_TRACER_H__
+#define __LIB_TRACER_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#define STRINGS_DB_SECTIONS_NUM 8
+#define STRINGS_DB_READ_SIZE_BYTES 256
+#define STRINGS_DB_LEFTOVER_SIZE_BYTES 64
+#define TRACER_BUFFER_PAGE_NUM 64
+#define TRACER_BUFFER_CHUNK 4096
+#define TRACE_BUFFER_SIZE_BYTE (TRACER_BUFFER_PAGE_NUM * TRACER_BUFFER_CHUNK)
+
+#define TRACER_BLOCK_SIZE_BYTE 256
+#define TRACES_PER_BLOCK 32
+
+#define TRACER_MAX_PARAMS 7
+#define MESSAGE_HASH_BITS 6
+#define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
+
+#define MASK_52_7 (0x1FFFFFFFFFFF80)
+#define MASK_6_0 (0x7F)
+
+struct mlx5_fw_tracer {
+ struct mlx5_core_dev *dev;
+ bool owner;
+ u8 trc_ver;
+ struct workqueue_struct *work_queue;
+ struct work_struct ownership_change_work;
+ struct work_struct read_fw_strings_work;
+
+ /* Strings DB */
+ struct {
+ u8 first_string_trace;
+ u8 num_string_trace;
+ u32 num_string_db;
+ u32 base_address_out[STRINGS_DB_SECTIONS_NUM];
+ u32 size_out[STRINGS_DB_SECTIONS_NUM];
+ void *buffer[STRINGS_DB_SECTIONS_NUM];
+ bool loaded;
+ } str_db;
+
+ /* Log Buffer */
+ struct {
+ u32 pdn;
+ void *log_buf;
+ dma_addr_t dma;
+ u32 size;
+ struct mlx5_core_mkey mkey;
+ u32 consumer_index;
+ } buff;
+
+ u64 last_timestamp;
+ struct work_struct handle_traces_work;
+ struct hlist_head hash[MESSAGE_HASH_SIZE];
+ struct list_head ready_strings_list;
+};
+
+struct tracer_string_format {
+ char *string;
+ int params[TRACER_MAX_PARAMS];
+ int num_of_params;
+ int last_param_num;
+ u8 event_id;
+ u32 tmsn;
+ struct hlist_node hlist;
+ struct list_head list;
+ u32 timestamp;
+ bool lost;
+};
+
+enum mlx5_fw_tracer_ownership_state {
+ MLX5_FW_TRACER_RELEASE_OWNERSHIP,
+ MLX5_FW_TRACER_ACQUIRE_OWNERSHIP,
+};
+
+enum tracer_ctrl_fields_select {
+ TRACE_STATUS = 1 << 0,
+};
+
+enum tracer_event_type {
+ TRACER_EVENT_TYPE_STRING,
+ TRACER_EVENT_TYPE_TIMESTAMP = 0xFF,
+ TRACER_EVENT_TYPE_UNRECOGNIZED,
+};
+
+enum tracing_mode {
+ TRACE_TO_MEMORY = 1 << 0,
+};
+
+struct tracer_timestamp_event {
+ u64 timestamp;
+ u8 unreliable;
+};
+
+struct tracer_string_event {
+ u32 timestamp;
+ u32 tmsn;
+ u32 tdsn;
+ u32 string_param;
+};
+
+struct tracer_event {
+ bool lost_event;
+ u32 type;
+ u8 event_id;
+ union {
+ struct tracer_string_event string_event;
+ struct tracer_timestamp_event timestamp_event;
+ };
+};
+
+struct mlx5_ifc_tracer_event_bits {
+ u8 lost[0x1];
+ u8 timestamp[0x7];
+ u8 event_id[0x8];
+ u8 event_data[0x30];
+};
+
+struct mlx5_ifc_tracer_string_event_bits {
+ u8 lost[0x1];
+ u8 timestamp[0x7];
+ u8 event_id[0x8];
+ u8 tmsn[0xd];
+ u8 tdsn[0x3];
+ u8 string_param[0x20];
+};
+
+struct mlx5_ifc_tracer_timestamp_event_bits {
+ u8 timestamp7_0[0x8];
+ u8 event_id[0x8];
+ u8 urts[0x3];
+ u8 timestamp52_40[0xd];
+ u8 timestamp39_8[0x20];
+};
+
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
new file mode 100644
index 000000000..83f90e9af
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(__LIB_TRACER_TRACEPOINT_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __LIB_TRACER_TRACEPOINT_H__
+
+#include <linux/tracepoint.h>
+#include "fw_tracer.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+/* Tracepoint for FWTracer messages: */
+TRACE_EVENT(mlx5_fw,
+ TP_PROTO(const struct mlx5_fw_tracer *tracer, u64 trace_timestamp,
+ bool lost, u8 event_id, const char *msg),
+
+ TP_ARGS(tracer, trace_timestamp, lost, event_id, msg),
+
+ TP_STRUCT__entry(
+ __string(dev_name, dev_name(&tracer->dev->pdev->dev))
+ __field(u64, trace_timestamp)
+ __field(bool, lost)
+ __field(u8, event_id)
+ __string(msg, msg)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name(&tracer->dev->pdev->dev));
+ __entry->trace_timestamp = trace_timestamp;
+ __entry->lost = lost;
+ __entry->event_id = event_id;
+ __assign_str(msg, msg);
+ ),
+
+ TP_printk("%s [0x%llx] %d [0x%x] %s",
+ __get_str(dev_name),
+ __entry->trace_timestamp,
+ __entry->lost, __entry->event_id,
+ __get_str(msg))
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ./diag
+#define TRACE_INCLUDE_FILE fw_tracer_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
new file mode 100644
index 000000000..d79e177f8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -0,0 +1,979 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_H__
+#define __MLX5_EN_H__
+
+#include <linux/if_vlan.h>
+#include <linux/etherdevice.h>
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/crash_dump.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/port.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/transobj.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rhashtable.h>
+#include <net/switchdev.h>
+#include <net/xdp.h>
+#include <linux/net_dim.h>
+#include "wq.h"
+#include "mlx5_core.h"
+#include "en_stats.h"
+#include "en/fs.h"
+
+extern const struct net_device_ops mlx5e_netdev_ops;
+struct page_pool;
+
+#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
+#define MLX5E_METADATA_ETHER_LEN 8
+
+#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+
+#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
+
+#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
+#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
+
+#define MLX5E_MAX_PRIORITY 8
+#define MLX5E_MAX_DSCP 64
+#define MLX5E_MAX_NUM_TC 8
+
+#define MLX5_RX_HEADROOM NET_SKB_PAD
+#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
+ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
+#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \
+ max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
+#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
+#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
+#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
+ (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
+ MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
+
+#define MLX5_MPWRQ_LOG_WQE_SZ 18
+#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
+ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
+#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
+
+#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
+#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS))
+#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
+#define MLX5E_MAX_RQ_NUM_MTTS \
+ ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
+#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \
+ (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS))
+#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \
+ (MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \
+ (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU))
+
+#define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM))
+#define MLX5E_LOG_MAX_RX_WQE_BULK \
+ (ilog2(PAGE_SIZE / roundup_pow_of_two(MLX5E_MIN_SKB_FRAG_SZ)))
+
+#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6
+#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK)
+#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd, \
+ MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW)
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
+
+#define MLX5E_RX_MAX_HEAD (256)
+
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
+#define MLX5E_DEFAULT_LRO_TIMEOUT 32
+#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4
+
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80
+#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2
+
+#define MLX5E_LOG_INDIR_RQT_SIZE 0x7
+#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE)
+#define MLX5E_MIN_NUM_CHANNELS 0x1
+#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1)
+#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
+#define MLX5E_TX_CQ_POLL_BUDGET 128
+#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
+
+#define MLX5E_UMR_WQE_INLINE_SZ \
+ (sizeof(struct mlx5e_umr_wqe) + \
+ ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \
+ MLX5_UMR_MTT_ALIGNMENT))
+#define MLX5E_UMR_WQEBBS \
+ (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
+#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
+
+#define MLX5E_NUM_MAIN_GROUPS 9
+
+#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
+
+#define mlx5e_dbg(mlevel, priv, format, ...) \
+do { \
+ if (NETIF_MSG_##mlevel & (priv)->msglevel) \
+ netdev_warn(priv->netdev, format, \
+ ##__VA_ARGS__); \
+} while (0)
+
+
+static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
+{
+ switch (wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW,
+ wq_size / 2);
+ default:
+ return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES,
+ wq_size / 2);
+ }
+}
+
+static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
+{
+ return is_kdump_kernel() ?
+ MLX5E_MIN_NUM_CHANNELS :
+ min_t(int, mdev->priv.eq_table.num_comp_vectors,
+ MLX5E_MAX_NUM_CHANNELS);
+}
+
+struct mlx5e_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_eth_seg eth;
+ struct mlx5_wqe_data_seg data[0];
+};
+
+struct mlx5e_rx_wqe_ll {
+ struct mlx5_wqe_srq_next_seg next;
+ struct mlx5_wqe_data_seg data[0];
+};
+
+struct mlx5e_rx_wqe_cyc {
+ struct mlx5_wqe_data_seg data[0];
+};
+
+struct mlx5e_umr_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_umr_ctrl_seg uctrl;
+ struct mlx5_mkey_seg mkc;
+ struct mlx5_mtt inline_mtts[0];
+};
+
+extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
+
+static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
+ "rx_cqe_moder",
+ "tx_cqe_moder",
+ "rx_cqe_compress",
+ "rx_striding_rq",
+ "rx_no_csum_complete",
+};
+
+enum mlx5e_priv_flag {
+ MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
+ MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
+ MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
+ MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3),
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4),
+};
+
+#define MLX5E_SET_PFLAG(params, pflag, enable) \
+ do { \
+ if (enable) \
+ (params)->pflags |= (pflag); \
+ else \
+ (params)->pflags &= ~(pflag); \
+ } while (0)
+
+#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag)))
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
+#endif
+
+struct mlx5e_params {
+ u8 log_sq_size;
+ u8 rq_wq_type;
+ u8 log_rq_mtu_frames;
+ u16 num_channels;
+ u8 num_tc;
+ bool rx_cqe_compress_def;
+ struct net_dim_cq_moder rx_cq_moderation;
+ struct net_dim_cq_moder tx_cq_moderation;
+ bool lro_en;
+ u32 lro_wqe_sz;
+ u8 tx_min_inline_mode;
+ u8 rss_hfunc;
+ u8 toeplitz_hash_key[40];
+ u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
+ bool vlan_strip_disable;
+ bool scatter_fcs_en;
+ bool rx_dim_enabled;
+ bool tx_dim_enabled;
+ u32 lro_timeout;
+ u32 pflags;
+ struct bpf_prog *xdp_prog;
+ unsigned int sw_mtu;
+ int hard_mtu;
+};
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+struct mlx5e_cee_config {
+ /* bw pct for priority group */
+ u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
+ u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+ bool pfc_setting[CEE_DCBX_MAX_PRIO];
+ bool pfc_enable;
+};
+
+enum {
+ MLX5_DCB_CHG_RESET,
+ MLX5_DCB_NO_CHG,
+ MLX5_DCB_CHG_NO_RESET,
+};
+
+struct mlx5e_dcbx {
+ enum mlx5_dcbx_oper_mode mode;
+ struct mlx5e_cee_config cee_cfg; /* pending configuration */
+ u8 dscp_app_cnt;
+
+ /* The only setting that cannot be read from FW */
+ u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
+ u8 cap;
+
+ /* Buffer configuration */
+ bool manual_buffer;
+ u32 cable_len;
+ u32 xoff;
+};
+
+struct mlx5e_dcbx_dp {
+ u8 dscp2prio[MLX5E_MAX_DSCP];
+ u8 trust_state;
+};
+#endif
+
+enum {
+ MLX5E_RQ_STATE_ENABLED,
+ MLX5E_RQ_STATE_AM,
+ MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
+};
+
+struct mlx5e_cq {
+ /* data path - accessed per cqe */
+ struct mlx5_cqwq wq;
+
+ /* data path - accessed per napi poll */
+ u16 event_ctr;
+ struct napi_struct *napi;
+ struct mlx5_core_cq mcq;
+ struct mlx5e_channel *channel;
+
+ /* cqe decompression */
+ struct mlx5_cqe64 title;
+ struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
+ u8 mini_arr_idx;
+ u16 decmprs_left;
+ u16 decmprs_wqe_counter;
+
+ /* control */
+ struct mlx5_core_dev *mdev;
+ struct mlx5_wq_ctrl wq_ctrl;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_tx_wqe_info {
+ struct sk_buff *skb;
+ u32 num_bytes;
+ u8 num_wqebbs;
+ u8 num_dma;
+};
+
+enum mlx5e_dma_map_type {
+ MLX5E_DMA_MAP_SINGLE,
+ MLX5E_DMA_MAP_PAGE
+};
+
+struct mlx5e_sq_dma {
+ dma_addr_t addr;
+ u32 size;
+ enum mlx5e_dma_map_type type;
+};
+
+enum {
+ MLX5E_SQ_STATE_ENABLED,
+ MLX5E_SQ_STATE_RECOVERING,
+ MLX5E_SQ_STATE_IPSEC,
+ MLX5E_SQ_STATE_AM,
+ MLX5E_SQ_STATE_TLS,
+ MLX5E_SQ_STATE_REDIRECT,
+};
+
+struct mlx5e_sq_wqe_info {
+ u8 opcode;
+};
+
+struct mlx5e_txqsq {
+ /* data path */
+
+ /* dirtied @completion */
+ u16 cc;
+ u32 dma_fifo_cc;
+ struct net_dim dim; /* Adaptive Moderation */
+
+ /* dirtied @xmit */
+ u16 pc ____cacheline_aligned_in_smp;
+ u32 dma_fifo_pc;
+
+ struct mlx5e_cq cq;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ u32 dma_fifo_mask;
+ struct mlx5e_sq_stats *stats;
+ struct {
+ struct mlx5e_sq_dma *dma_fifo;
+ struct mlx5e_tx_wqe_info *wqe_info;
+ } db;
+ void __iomem *uar_map;
+ struct netdev_queue *txq;
+ u32 sqn;
+ u8 min_inline_mode;
+ struct device *pdev;
+ __be32 mkey_be;
+ unsigned long state;
+ struct hwtstamp_config *tstamp;
+ struct mlx5_clock *clock;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5e_channel *channel;
+ int txq_ix;
+ u32 rate_limit;
+ struct mlx5e_txqsq_recover {
+ struct work_struct recover_work;
+ u64 last_recover;
+ } recover;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_dma_info {
+ struct page *page;
+ dma_addr_t addr;
+};
+
+struct mlx5e_xdp_info {
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ struct mlx5e_dma_info di;
+};
+
+struct mlx5e_xdpsq {
+ /* data path */
+
+ /* dirtied @completion */
+ u16 cc;
+ bool redirect_flush;
+
+ /* dirtied @xmit */
+ u16 pc ____cacheline_aligned_in_smp;
+ bool doorbell;
+
+ struct mlx5e_cq cq;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ struct mlx5e_xdpsq_stats *stats;
+ struct {
+ struct mlx5e_xdp_info *xdpi;
+ } db;
+ void __iomem *uar_map;
+ u32 sqn;
+ struct device *pdev;
+ __be32 mkey_be;
+ u8 min_inline_mode;
+ unsigned long state;
+ unsigned int hw_mtu;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5e_channel *channel;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_icosq {
+ /* data path */
+
+ /* dirtied @xmit */
+ u16 pc ____cacheline_aligned_in_smp;
+
+ struct mlx5e_cq cq;
+
+ /* write@xmit, read@completion */
+ struct {
+ struct mlx5e_sq_wqe_info *ico_wqe;
+ } db;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ void __iomem *uar_map;
+ u32 sqn;
+ unsigned long state;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5e_channel *channel;
+} ____cacheline_aligned_in_smp;
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+ return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
+}
+
+struct mlx5e_wqe_frag_info {
+ struct mlx5e_dma_info *di;
+ u32 offset;
+ bool last_in_page;
+};
+
+struct mlx5e_umr_dma_info {
+ struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
+};
+
+struct mlx5e_mpw_info {
+ struct mlx5e_umr_dma_info umr;
+ u16 consumed_strides;
+ DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+};
+
+#define MLX5E_MAX_RX_FRAGS 4
+
+/* a single cache unit is capable to serve one napi call (for non-striding rq)
+ * or a MPWQE (for striding rq).
+ */
+#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
+ MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
+#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
+struct mlx5e_page_cache {
+ u32 head;
+ u32 tail;
+ struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
+};
+
+struct mlx5e_rq;
+typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
+typedef struct sk_buff *
+(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+typedef struct sk_buff *
+(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
+typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
+
+enum mlx5e_rq_flag {
+ MLX5E_RQ_FLAG_XDP_XMIT = BIT(0),
+};
+
+struct mlx5e_rq_frag_info {
+ int frag_size;
+ int frag_stride;
+};
+
+struct mlx5e_rq_frags_info {
+ struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS];
+ u8 num_frags;
+ u8 log_num_frags;
+ u8 wqe_bulk;
+};
+
+struct mlx5e_rq {
+ /* data path */
+ union {
+ struct {
+ struct mlx5_wq_cyc wq;
+ struct mlx5e_wqe_frag_info *frags;
+ struct mlx5e_dma_info *di;
+ struct mlx5e_rq_frags_info info;
+ mlx5e_fp_skb_from_cqe skb_from_cqe;
+ } wqe;
+ struct {
+ struct mlx5_wq_ll wq;
+ struct mlx5e_umr_wqe umr_wqe;
+ struct mlx5e_mpw_info *info;
+ mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
+ u16 num_strides;
+ u8 log_stride_sz;
+ bool umr_in_progress;
+ } mpwqe;
+ };
+ struct {
+ u16 headroom;
+ u8 map_dir; /* dma map direction */
+ } buff;
+
+ struct mlx5e_channel *channel;
+ struct device *pdev;
+ struct net_device *netdev;
+ struct mlx5e_rq_stats *stats;
+ struct mlx5e_cq cq;
+ struct mlx5e_page_cache page_cache;
+ struct hwtstamp_config *tstamp;
+ struct mlx5_clock *clock;
+
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe;
+ mlx5e_fp_post_rx_wqes post_wqes;
+ mlx5e_fp_dealloc_wqe dealloc_wqe;
+
+ unsigned long state;
+ int ix;
+ unsigned int hw_mtu;
+
+ struct net_dim dim; /* Dynamic Interrupt Moderation */
+
+ /* XDP */
+ struct bpf_prog *xdp_prog;
+ struct mlx5e_xdpsq xdpsq;
+ DECLARE_BITMAP(flags, 8);
+ struct page_pool *page_pool;
+
+ /* control */
+ struct mlx5_wq_ctrl wq_ctrl;
+ __be32 mkey_be;
+ u8 wq_type;
+ u32 rqn;
+ struct mlx5_core_dev *mdev;
+ struct mlx5_core_mkey umr_mkey;
+
+ /* XDP read-mostly */
+ struct xdp_rxq_info xdp_rxq;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_channel {
+ /* data path */
+ struct mlx5e_rq rq;
+ struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_icosq icosq; /* internal control operations */
+ bool xdp;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+ u8 num_tc;
+
+ /* XDP_REDIRECT */
+ struct mlx5e_xdpsq xdpsq;
+
+ /* data path - accessed per napi poll */
+ struct irq_desc *irq_desc;
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ int ix;
+ int cpu;
+};
+
+struct mlx5e_channels {
+ struct mlx5e_channel **c;
+ unsigned int num;
+ struct mlx5e_params params;
+};
+
+struct mlx5e_channel_stats {
+ struct mlx5e_ch_stats ch;
+ struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rq_stats rq;
+ struct mlx5e_xdpsq_stats rq_xdpsq;
+ struct mlx5e_xdpsq_stats xdpsq;
+} ____cacheline_aligned_in_smp;
+
+enum {
+ MLX5E_STATE_ASYNC_EVENTS_ENABLED,
+ MLX5E_STATE_OPENED,
+ MLX5E_STATE_DESTROYING,
+ MLX5E_STATE_XDP_TX_ENABLED,
+};
+
+struct mlx5e_rqt {
+ u32 rqtn;
+ bool enabled;
+};
+
+struct mlx5e_tir {
+ u32 tirn;
+ struct mlx5e_rqt rqt;
+ struct list_head list;
+};
+
+enum {
+ MLX5E_TC_PRIO = 0,
+ MLX5E_NIC_PRIO
+};
+
+struct mlx5e_priv {
+ /* priv data path fields - start */
+ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
+ int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ struct mlx5e_dcbx_dp dcbx_dp;
+#endif
+ /* priv data path fields - end */
+
+ u32 msglevel;
+ unsigned long state;
+ struct mutex state_lock; /* Protects Interface state */
+ struct mlx5e_rq drop_rq;
+
+ struct mlx5e_channels channels;
+ u32 tisn[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rqt indir_rqt;
+ struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
+ u32 tx_rates[MLX5E_MAX_NUM_SQS];
+
+ struct mlx5e_flow_steering fs;
+
+ struct workqueue_struct *wq;
+ struct work_struct update_carrier_work;
+ struct work_struct set_rx_mode_work;
+ struct work_struct tx_timeout_work;
+ struct delayed_work update_stats_work;
+
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_stats stats;
+ struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS];
+ u8 max_opened_tc;
+ struct hwtstamp_config tstamp;
+ u16 q_counter;
+ u16 drop_rq_q_counter;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ struct mlx5e_dcbx dcbx;
+#endif
+
+ const struct mlx5e_profile *profile;
+ void *ppriv;
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5e_ipsec *ipsec;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_tls *tls;
+#endif
+};
+
+struct mlx5e_profile {
+ void (*init)(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile, void *ppriv);
+ void (*cleanup)(struct mlx5e_priv *priv);
+ int (*init_rx)(struct mlx5e_priv *priv);
+ void (*cleanup_rx)(struct mlx5e_priv *priv);
+ int (*init_tx)(struct mlx5e_priv *priv);
+ void (*cleanup_tx)(struct mlx5e_priv *priv);
+ void (*enable)(struct mlx5e_priv *priv);
+ void (*disable)(struct mlx5e_priv *priv);
+ void (*update_stats)(struct mlx5e_priv *priv);
+ void (*update_carrier)(struct mlx5e_priv *priv);
+ int (*max_nch)(struct mlx5_core_dev *mdev);
+ struct {
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe;
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
+ } rx_handlers;
+ int max_tc;
+};
+
+void mlx5e_build_ptys2ethtool_map(void);
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev,
+ select_queue_fallback_t fallback);
+netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe *wqe, u16 pi);
+
+void mlx5e_completion_event(struct mlx5_core_cq *mcq);
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
+int mlx5e_napi_poll(struct napi_struct *napi, int budget);
+bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
+int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
+
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
+bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+ bool recycle);
+void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
+bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+struct sk_buff *
+mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
+struct sk_buff *
+mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
+
+void mlx5e_update_stats(struct mlx5e_priv *priv);
+
+void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
+int mlx5e_self_test_num(struct mlx5e_priv *priv);
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+ u64 *buf);
+void mlx5e_set_rx_mode_work(struct work_struct *work);
+
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr);
+int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr);
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val);
+
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
+ u16 vid);
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
+ u16 vid);
+void mlx5e_timestamp_init(struct mlx5e_priv *priv);
+
+struct mlx5e_redirect_rqt_param {
+ bool is_rss;
+ union {
+ u32 rqn; /* Direct RQN (Non-RSS) */
+ struct {
+ u8 hfunc;
+ struct mlx5e_channels *channels;
+ } rss; /* RSS data */
+ };
+};
+
+int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
+ struct mlx5e_redirect_rqt_param rrp);
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
+ enum mlx5e_traffic_types tt,
+ void *tirc, bool inner);
+
+int mlx5e_open_locked(struct net_device *netdev);
+int mlx5e_close_locked(struct net_device *netdev);
+
+int mlx5e_open_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs);
+void mlx5e_close_channels(struct mlx5e_channels *chs);
+
+/* Function pointer to be used to modify WH settings while
+ * switching channels
+ */
+typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv);
+void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *new_chs,
+ mlx5e_fp_hw_modify hw_modify);
+void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
+void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
+
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+ int num_channels);
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
+ u8 cq_period_mode);
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
+ u8 cq_period_mode);
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+
+static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+ return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
+}
+
+static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+
+ *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ *wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+ memset(*wqe, 0, sizeof(**wqe));
+}
+
+static inline
+struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline
+void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc,
+ void __iomem *uar_map,
+ struct mlx5_wqe_ctrl_seg *ctrl)
+{
+ ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ *wq->db = cpu_to_be32(pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, uar_map, NULL);
+}
+
+static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq;
+
+ mcq = &cq->mcq;
+ mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+}
+
+extern const struct ethtool_ops mlx5e_ethtool_ops;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
+int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
+#endif
+
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+ struct mlx5e_tir *tir, u32 *in, int inlen);
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+ struct mlx5e_tir *tir);
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
+
+/* common netdev helpers */
+int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
+
+int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
+
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
+ u32 underlay_qpn, u32 *tisn);
+void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
+
+int mlx5e_create_tises(struct mlx5e_priv *priv);
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+int mlx5e_close(struct net_device *netdev);
+int mlx5e_open(struct net_device *netdev);
+void mlx5e_update_stats_work(struct work_struct *work);
+
+int mlx5e_bits_invert(unsigned long a, int size);
+
+typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv);
+int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
+ change_hw_mtu_cb set_mtu_cb);
+
+/* ethtool helpers */
+void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
+ struct ethtool_drvinfo *drvinfo);
+void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv,
+ uint32_t stringset, uint8_t *data);
+int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset);
+void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
+ struct ethtool_stats *stats, u64 *data);
+void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param);
+int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param);
+void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch);
+int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch);
+int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal);
+int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal);
+int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
+ struct ethtool_ts_info *info);
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ struct ethtool_flash *flash);
+
+/* mlx5e generic netdev management API */
+struct net_device*
+mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
+ void *ppriv);
+int mlx5e_attach_netdev(struct mlx5e_priv *priv);
+void mlx5e_detach_netdev(struct mlx5e_priv *priv);
+void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
+void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 max_channels, u16 mtu);
+u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
+void mlx5e_rx_dim_work(struct work_struct *work);
+void mlx5e_tx_dim_work(struct work_struct *work);
+#endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
new file mode 100644
index 000000000..1431232c9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5E_FLOW_STEER_H__
+#define __MLX5E_FLOW_STEER_H__
+
+enum {
+ MLX5E_TC_FT_LEVEL = 0,
+ MLX5E_TC_TTC_FT_LEVEL,
+};
+
+struct mlx5e_tc_table {
+ struct mlx5_flow_table *t;
+
+ struct rhashtable ht;
+
+ DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+ DECLARE_HASHTABLE(hairpin_tbl, 8);
+
+ struct notifier_block netdevice_nb;
+};
+
+struct mlx5e_flow_table {
+ int num_groups;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_group **g;
+};
+
+struct mlx5e_l2_rule {
+ u8 addr[ETH_ALEN + 2];
+ struct mlx5_flow_handle *rule;
+};
+
+#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE)
+
+struct mlx5e_vlan_table {
+ struct mlx5e_flow_table ft;
+ DECLARE_BITMAP(active_cvlans, VLAN_N_VID);
+ DECLARE_BITMAP(active_svlans, VLAN_N_VID);
+ struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID];
+ struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID];
+ struct mlx5_flow_handle *untagged_rule;
+ struct mlx5_flow_handle *any_cvlan_rule;
+ struct mlx5_flow_handle *any_svlan_rule;
+ bool cvlan_filter_disabled;
+};
+
+struct mlx5e_l2_table {
+ struct mlx5e_flow_table ft;
+ struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE];
+ struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE];
+ struct mlx5e_l2_rule broadcast;
+ struct mlx5e_l2_rule allmulti;
+ struct mlx5e_l2_rule promisc;
+ bool broadcast_enabled;
+ bool allmulti_enabled;
+ bool promisc_enabled;
+};
+
+enum mlx5e_traffic_types {
+ MLX5E_TT_IPV4_TCP,
+ MLX5E_TT_IPV6_TCP,
+ MLX5E_TT_IPV4_UDP,
+ MLX5E_TT_IPV6_UDP,
+ MLX5E_TT_IPV4_IPSEC_AH,
+ MLX5E_TT_IPV6_IPSEC_AH,
+ MLX5E_TT_IPV4_IPSEC_ESP,
+ MLX5E_TT_IPV6_IPSEC_ESP,
+ MLX5E_TT_IPV4,
+ MLX5E_TT_IPV6,
+ MLX5E_TT_ANY,
+ MLX5E_NUM_TT,
+ MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
+};
+
+enum mlx5e_tunnel_types {
+ MLX5E_TT_IPV4_GRE,
+ MLX5E_TT_IPV6_GRE,
+ MLX5E_NUM_TUNNEL_TT,
+};
+
+/* L3/L4 traffic type classifier */
+struct mlx5e_ttc_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *rules[MLX5E_NUM_TT];
+ struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
+};
+
+/* NIC prio FTS */
+enum {
+ MLX5E_VLAN_FT_LEVEL = 0,
+ MLX5E_L2_FT_LEVEL,
+ MLX5E_TTC_FT_LEVEL,
+ MLX5E_INNER_TTC_FT_LEVEL,
+#ifdef CONFIG_MLX5_EN_ARFS
+ MLX5E_ARFS_FT_LEVEL
+#endif
+};
+
+#ifdef CONFIG_MLX5_EN_RXNFC
+
+struct mlx5e_ethtool_table {
+ struct mlx5_flow_table *ft;
+ int num_rules;
+};
+
+#define ETHTOOL_NUM_L3_L4_FTS 7
+#define ETHTOOL_NUM_L2_FTS 4
+
+struct mlx5e_ethtool_steering {
+ struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
+ struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
+ struct list_head rules;
+ int tot_num_rules;
+};
+
+void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
+int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd);
+int mlx5e_get_rxnfc(struct net_device *dev,
+ struct ethtool_rxnfc *info, u32 *rule_locs);
+#else
+static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { }
+static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { }
+#endif /* CONFIG_MLX5_EN_RXNFC */
+
+#ifdef CONFIG_MLX5_EN_ARFS
+#define ARFS_HASH_SHIFT BITS_PER_BYTE
+#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
+
+struct arfs_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *default_rule;
+ struct hlist_head rules_hash[ARFS_HASH_SIZE];
+};
+
+enum arfs_type {
+ ARFS_IPV4_TCP,
+ ARFS_IPV6_TCP,
+ ARFS_IPV4_UDP,
+ ARFS_IPV6_UDP,
+ ARFS_NUM_TYPES,
+};
+
+struct mlx5e_arfs_tables {
+ struct arfs_table arfs_tables[ARFS_NUM_TYPES];
+ /* Protect aRFS rules list */
+ spinlock_t arfs_lock;
+ struct list_head rules;
+ int last_filter_id;
+ struct workqueue_struct *wq;
+};
+
+int mlx5e_arfs_create_tables(struct mlx5e_priv *priv);
+void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv);
+int mlx5e_arfs_enable(struct mlx5e_priv *priv);
+int mlx5e_arfs_disable(struct mlx5e_priv *priv);
+int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id);
+#else
+static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {}
+static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
+#endif
+
+struct mlx5e_flow_steering {
+ struct mlx5_flow_namespace *ns;
+#ifdef CONFIG_MLX5_EN_RXNFC
+ struct mlx5e_ethtool_steering ethtool;
+#endif
+ struct mlx5e_tc_table tc;
+ struct mlx5e_vlan_table vlan;
+ struct mlx5e_l2_table l2;
+ struct mlx5e_ttc_table ttc;
+ struct mlx5e_ttc_table inner_ttc;
+#ifdef CONFIG_MLX5_EN_ARFS
+ struct mlx5e_arfs_tables arfs;
+#endif
+};
+
+struct ttc_params {
+ struct mlx5_flow_table_attr ft_attr;
+ u32 any_tt_tirn;
+ u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_ttc_table *inner_ttc;
+};
+
+void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params);
+void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params);
+void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params);
+
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
+ struct mlx5e_ttc_table *ttc);
+
+int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
+ struct mlx5e_ttc_table *ttc);
+
+void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+
+void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
+void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
+
+int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
+void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
+
+#endif /* __MLX5E_FLOW_STEER_H__ */
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
new file mode 100644
index 000000000..12e1682f9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "port.h"
+
+/* speed in units of 1Mb */
+static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
+ [MLX5E_1000BASE_CX_SGMII] = 1000,
+ [MLX5E_1000BASE_KX] = 1000,
+ [MLX5E_10GBASE_CX4] = 10000,
+ [MLX5E_10GBASE_KX4] = 10000,
+ [MLX5E_10GBASE_KR] = 10000,
+ [MLX5E_20GBASE_KR2] = 20000,
+ [MLX5E_40GBASE_CR4] = 40000,
+ [MLX5E_40GBASE_KR4] = 40000,
+ [MLX5E_56GBASE_R4] = 56000,
+ [MLX5E_10GBASE_CR] = 10000,
+ [MLX5E_10GBASE_SR] = 10000,
+ [MLX5E_10GBASE_ER] = 10000,
+ [MLX5E_40GBASE_SR4] = 40000,
+ [MLX5E_40GBASE_LR4] = 40000,
+ [MLX5E_50GBASE_SR2] = 50000,
+ [MLX5E_100GBASE_CR4] = 100000,
+ [MLX5E_100GBASE_SR4] = 100000,
+ [MLX5E_100GBASE_KR4] = 100000,
+ [MLX5E_100GBASE_LR4] = 100000,
+ [MLX5E_100BASE_TX] = 100,
+ [MLX5E_1000BASE_T] = 1000,
+ [MLX5E_10GBASE_T] = 10000,
+ [MLX5E_25GBASE_CR] = 25000,
+ [MLX5E_25GBASE_KR] = 25000,
+ [MLX5E_25GBASE_SR] = 25000,
+ [MLX5E_50GBASE_CR2] = 50000,
+ [MLX5E_50GBASE_KR2] = 50000,
+};
+
+u32 mlx5e_port_ptys2speed(u32 eth_proto_oper)
+{
+ unsigned long temp = eth_proto_oper;
+ u32 speed = 0;
+ int i;
+
+ i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER);
+ if (i < MLX5E_LINK_MODES_NUMBER)
+ speed = mlx5e_link_speed[i];
+
+ return speed;
+}
+
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
+ u32 eth_proto_oper;
+ int err;
+
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+ if (err)
+ return err;
+
+ eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+ *speed = mlx5e_port_ptys2speed(eth_proto_oper);
+ if (!(*speed))
+ err = -EINVAL;
+
+ return err;
+}
+
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ u32 max_speed = 0;
+ u32 proto_cap;
+ int err;
+ int i;
+
+ err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
+ if (proto_cap & MLX5E_PROT_MASK(i))
+ max_speed = max(max_speed, mlx5e_link_speed[i]);
+
+ *speed = max_speed;
+ return 0;
+}
+
+u32 mlx5e_port_speed2linkmodes(u32 speed)
+{
+ u32 link_modes = 0;
+ int i;
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+ if (mlx5e_link_speed[i] == speed)
+ link_modes |= MLX5E_PROT_MASK(i);
+ }
+
+ return link_modes;
+}
+
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out)
+{
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *in;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(pbmc_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 0);
+
+ kfree(in);
+ return err;
+}
+
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in)
+{
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *out;
+ int err;
+
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(pbmc_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 1);
+
+ kfree(out);
+ return err;
+}
+
+/* buffer[i]: buffer that priority i mapped to */
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+ int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+ u32 prio_x_buff;
+ void *out;
+ void *in;
+ int prio;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(pptb_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+ if (err)
+ goto out;
+
+ prio_x_buff = MLX5_GET(pptb_reg, out, prio_x_buff);
+ for (prio = 0; prio < 8; prio++) {
+ buffer[prio] = (u8)(prio_x_buff >> (4 * prio)) & 0xF;
+ mlx5_core_dbg(mdev, "prio %d, buffer %d\n", prio, buffer[prio]);
+ }
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+ int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+ u32 prio_x_buff;
+ void *out;
+ void *in;
+ int prio;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* First query the pptb register */
+ MLX5_SET(pptb_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+ if (err)
+ goto out;
+
+ memcpy(in, out, sz);
+ MLX5_SET(pptb_reg, in, local_port, 1);
+
+ /* Update the pm and prio_x_buff */
+ MLX5_SET(pptb_reg, in, pm, 0xFF);
+
+ prio_x_buff = 0;
+ for (prio = 0; prio < 8; prio++)
+ prio_x_buff |= (buffer[prio] << (4 * prio));
+ MLX5_SET(pptb_reg, in, prio_x_buff, prio_x_buff);
+
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 1);
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
new file mode 100644
index 000000000..f8cbd8194
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_EN_PORT_H
+#define __MLX5E_EN_PORT_H
+
+#include <linux/mlx5/driver.h>
+#include "en.h"
+
+u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+u32 mlx5e_port_speed2linkmodes(u32 speed);
+
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
new file mode 100644
index 000000000..28d56e44e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "port_buffer.h"
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ u32 total_used = 0;
+ void *buffer;
+ void *out;
+ int err;
+ int i;
+
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5e_port_query_pbmc(mdev, out);
+ if (err)
+ goto out;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
+ port_buffer->buffer[i].lossy =
+ MLX5_GET(bufferx_reg, buffer, lossy);
+ port_buffer->buffer[i].epsb =
+ MLX5_GET(bufferx_reg, buffer, epsb);
+ port_buffer->buffer[i].size =
+ MLX5_GET(bufferx_reg, buffer, size) << MLX5E_BUFFER_CELL_SHIFT;
+ port_buffer->buffer[i].xon =
+ MLX5_GET(bufferx_reg, buffer, xon_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+ port_buffer->buffer[i].xoff =
+ MLX5_GET(bufferx_reg, buffer, xoff_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+ total_used += port_buffer->buffer[i].size;
+
+ mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i,
+ port_buffer->buffer[i].size,
+ port_buffer->buffer[i].xon,
+ port_buffer->buffer[i].xoff,
+ port_buffer->buffer[i].epsb,
+ port_buffer->buffer[i].lossy);
+ }
+
+ port_buffer->port_buffer_size =
+ MLX5_GET(pbmc_reg, out, port_buffer_size) << MLX5E_BUFFER_CELL_SHIFT;
+ port_buffer->spare_buffer_size =
+ port_buffer->port_buffer_size - total_used;
+
+ mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n",
+ port_buffer->port_buffer_size,
+ port_buffer->spare_buffer_size);
+out:
+ kfree(out);
+ return err;
+}
+
+static int port_set_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *buffer;
+ void *in;
+ int err;
+ int i;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = mlx5e_port_query_pbmc(mdev, in);
+ if (err)
+ goto out;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
+
+ MLX5_SET(bufferx_reg, buffer, size,
+ port_buffer->buffer[i].size >> MLX5E_BUFFER_CELL_SHIFT);
+ MLX5_SET(bufferx_reg, buffer, lossy,
+ port_buffer->buffer[i].lossy);
+ MLX5_SET(bufferx_reg, buffer, xoff_threshold,
+ port_buffer->buffer[i].xoff >> MLX5E_BUFFER_CELL_SHIFT);
+ MLX5_SET(bufferx_reg, buffer, xon_threshold,
+ port_buffer->buffer[i].xon >> MLX5E_BUFFER_CELL_SHIFT);
+ }
+
+ err = mlx5e_port_set_pbmc(mdev, in);
+out:
+ kfree(in);
+ return err;
+}
+
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B])
+ * minimum speed value is 40Gbps
+ */
+static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
+{
+ u32 speed;
+ u32 xoff;
+ int err;
+
+ err = mlx5e_port_linkspeed(priv->mdev, &speed);
+ if (err)
+ speed = SPEED_40000;
+ speed = max_t(u32, speed, SPEED_40000);
+
+ xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
+
+ mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff);
+ return xoff;
+}
+
+static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
+ u32 xoff, unsigned int max_mtu)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ if (port_buffer->buffer[i].lossy) {
+ port_buffer->buffer[i].xoff = 0;
+ port_buffer->buffer[i].xon = 0;
+ continue;
+ }
+
+ if (port_buffer->buffer[i].size <
+ (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) {
+ pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n",
+ i, port_buffer->buffer[i].size);
+ return -ENOMEM;
+ }
+
+ port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
+ port_buffer->buffer[i].xon =
+ port_buffer->buffer[i].xoff - max_mtu;
+ }
+
+ return 0;
+}
+
+/**
+ * update_buffer_lossy()
+ * max_mtu: netdev's max_mtu
+ * pfc_en: <input> current pfc configuration
+ * buffer: <input> current prio to buffer mapping
+ * xoff: <input> xoff value
+ * port_buffer: <output> port receive buffer configuration
+ * change: <output>
+ *
+ * Update buffer configuration based on pfc configuraiton and priority
+ * to buffer mapping.
+ * Buffer's lossy bit is changed to:
+ * lossless if there is at least one PFC enabled priority mapped to this buffer
+ * lossy if all priorities mapped to this buffer are PFC disabled
+ *
+ * Return:
+ * Return 0 if no error.
+ * Set change to true if buffer configuration is modified.
+ */
+static int update_buffer_lossy(unsigned int max_mtu,
+ u8 pfc_en, u8 *buffer, u32 xoff,
+ struct mlx5e_port_buffer *port_buffer,
+ bool *change)
+{
+ bool changed = false;
+ u8 lossy_count;
+ u8 prio_count;
+ u8 lossy;
+ int prio;
+ int err;
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ prio_count = 0;
+ lossy_count = 0;
+
+ for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) {
+ if (buffer[prio] != i)
+ continue;
+
+ prio_count++;
+ lossy_count += !(pfc_en & (1 << prio));
+ }
+
+ if (lossy_count == prio_count)
+ lossy = 1;
+ else /* lossy_count < prio_count */
+ lossy = 0;
+
+ if (lossy != port_buffer->buffer[i].lossy) {
+ port_buffer->buffer[i].lossy = lossy;
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ err = update_xoff_threshold(port_buffer, xoff, max_mtu);
+ if (err)
+ return err;
+
+ *change = true;
+ }
+
+ return 0;
+}
+
+static int fill_pfc_en(struct mlx5_core_dev *mdev, u8 *pfc_en)
+{
+ u32 g_rx_pause, g_tx_pause;
+ int err;
+
+ err = mlx5_query_port_pause(mdev, &g_rx_pause, &g_tx_pause);
+ if (err)
+ return err;
+
+ /* If global pause enabled, set all active buffers to lossless.
+ * Otherwise, check PFC setting.
+ */
+ if (g_rx_pause || g_tx_pause)
+ *pfc_en = 0xff;
+ else
+ err = mlx5_query_port_pfc(mdev, pfc_en, NULL);
+
+ return err;
+}
+
+#define MINIMUM_MAX_MTU 9216
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+ u32 change, unsigned int mtu,
+ struct ieee_pfc *pfc,
+ u32 *buffer_size,
+ u8 *prio2buffer)
+{
+ struct mlx5e_port_buffer port_buffer;
+ u32 xoff = calculate_xoff(priv, mtu);
+ bool update_prio2buffer = false;
+ u8 buffer[MLX5E_MAX_PRIORITY];
+ bool update_buffer = false;
+ unsigned int max_mtu;
+ u32 total_used = 0;
+ u8 curr_pfc_en;
+ int err;
+ int i;
+
+ mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+ max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU);
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_PFC) {
+ err = mlx5e_port_query_priority2buffer(priv->mdev, buffer);
+ if (err)
+ return err;
+
+ err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff,
+ &port_buffer, &update_buffer);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) {
+ update_prio2buffer = true;
+ err = fill_pfc_en(priv->mdev, &curr_pfc_en);
+ if (err)
+ return err;
+
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer,
+ xoff, &port_buffer, &update_buffer);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_SIZE) {
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]);
+ if (!port_buffer.buffer[i].lossy && !buffer_size[i]) {
+ mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n",
+ __func__, i);
+ return -EINVAL;
+ }
+
+ port_buffer.buffer[i].size = buffer_size[i];
+ total_used += buffer_size[i];
+ }
+
+ mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used);
+
+ if (total_used > port_buffer.port_buffer_size)
+ return -EINVAL;
+
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
+ if (err)
+ return err;
+ }
+
+ /* Need to update buffer configuration if xoff value is changed */
+ if (!update_buffer && xoff != priv->dcbx.xoff) {
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
+ if (err)
+ return err;
+ }
+ priv->dcbx.xoff = xoff;
+
+ /* Apply the settings */
+ if (update_buffer) {
+ err = port_set_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+ }
+
+ if (update_prio2buffer)
+ err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
new file mode 100644
index 000000000..34f55b81a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_PORT_BUFFER_H__
+#define __MLX5_EN_PORT_BUFFER_H__
+
+#include "en.h"
+#include "port.h"
+
+#define MLX5E_MAX_BUFFER 8
+#define MLX5E_BUFFER_CELL_SHIFT 7
+#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */
+
+#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \
+ MLX5_CAP_PCAM_REG(mdev, pbmc) && \
+ MLX5_CAP_PCAM_REG(mdev, pptb))
+
+enum {
+ MLX5E_PORT_BUFFER_CABLE_LEN = BIT(0),
+ MLX5E_PORT_BUFFER_PFC = BIT(1),
+ MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2),
+ MLX5E_PORT_BUFFER_SIZE = BIT(3),
+};
+
+struct mlx5e_bufferx_reg {
+ u8 lossy;
+ u8 epsb;
+ u32 size;
+ u32 xoff;
+ u32 xon;
+};
+
+struct mlx5e_port_buffer {
+ u32 port_buffer_size;
+ u32 spare_buffer_size;
+ struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER];
+};
+
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+ u32 change, unsigned int mtu,
+ struct ieee_pfc *pfc,
+ u32 *buffer_size,
+ u8 *prio2buffer);
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
new file mode 100644
index 000000000..599114ab7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bpf_trace.h>
+#include "en/xdp.h"
+
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+{
+ int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+
+ /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+ * The condition checked in mlx5e_rx_is_linear_skb is:
+ * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
+ * (Note that hw_mtu == sw_mtu + hard_mtu.)
+ * What is returned from this function is:
+ * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
+ * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+ * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+ * because both PAGE_SIZE and S are already aligned. Any number greater
+ * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+ * so max_mtu is the maximum MTU allowed.
+ */
+
+ return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
+static inline bool
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
+ struct xdp_buff *xdp)
+{
+ struct mlx5e_xdp_info xdpi;
+
+ xdpi.xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpi.xdpf))
+ return false;
+ xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
+ dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
+ xdpi.xdpf->len, PCI_DMA_TODEVICE);
+ xdpi.di = *di;
+
+ return mlx5e_xmit_xdp_frame(sq, &xdpi);
+}
+
+/* returns true if packet was consumed by xdp */
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+ void *va, u16 *rx_headroom, u32 *len)
+{
+ struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+ struct xdp_buff xdp;
+ u32 act;
+ int err;
+
+ if (!prog)
+ return false;
+
+ xdp.data = va + *rx_headroom;
+ xdp_set_data_meta_invalid(&xdp);
+ xdp.data_end = xdp.data + *len;
+ xdp.data_hard_start = va;
+ xdp.rxq = &rq->xdp_rxq;
+
+ act = bpf_prog_run_xdp(prog, &xdp);
+ switch (act) {
+ case XDP_PASS:
+ *rx_headroom = xdp.data - xdp.data_hard_start;
+ *len = xdp.data_end - xdp.data;
+ return false;
+ case XDP_TX:
+ if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
+ goto xdp_abort;
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+ return true;
+ case XDP_REDIRECT:
+ /* When XDP enabled then page-refcnt==1 here */
+ err = xdp_do_redirect(rq->netdev, &xdp, prog);
+ if (unlikely(err))
+ goto xdp_abort;
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
+ rq->xdpsq.redirect_flush = true;
+ mlx5e_page_dma_unmap(rq, di);
+ rq->stats->xdp_redirect++;
+ return true;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+xdp_abort:
+ trace_xdp_exception(rq->netdev, prog, act);
+ /* fall through */
+ case XDP_DROP:
+ rq->stats->xdp_drop++;
+ return true;
+ }
+}
+
+bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg = wqe->data;
+
+ struct xdp_frame *xdpf = xdpi->xdpf;
+ dma_addr_t dma_addr = xdpi->dma_addr;
+ unsigned int dma_len = xdpf->len;
+
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+ prefetchw(wqe);
+
+ if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
+ stats->err++;
+ return false;
+ }
+
+ if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
+ if (sq->doorbell) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->doorbell = false;
+ }
+ stats->full++;
+ return false;
+ }
+
+ cseg->fm_ce_se = 0;
+
+ /* copy the inline part if required */
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
+ eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+ dma_len -= MLX5E_XDP_MIN_INLINE;
+ dma_addr += MLX5E_XDP_MIN_INLINE;
+ dseg++;
+ }
+
+ /* write the dma part */
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
+
+ /* move page to reference to sq responsibility,
+ * and mark so it's not put back in page-cache.
+ */
+ sq->db.xdpi[pi] = *xdpi;
+ sq->pc++;
+
+ sq->doorbell = true;
+
+ stats->xmit++;
+ return true;
+}
+
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
+{
+ struct mlx5e_xdpsq *sq;
+ struct mlx5_cqe64 *cqe;
+ struct mlx5e_rq *rq;
+ bool is_redirect;
+ u16 sqcc;
+ int i;
+
+ sq = container_of(cq, struct mlx5e_xdpsq, cq);
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return false;
+
+ is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
+ rq = container_of(sq, struct mlx5e_rq, xdpsq);
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ i = 0;
+ do {
+ u16 wqe_counter;
+ bool last_wqe;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ do {
+ u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+
+ last_wqe = (sqcc == wqe_counter);
+ sqcc++;
+
+ if (is_redirect) {
+ dma_unmap_single(sq->pdev, xdpi->dma_addr,
+ xdpi->xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi->xdpf);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi->di, true);
+ }
+ } while (!last_wqe);
+ } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+ sq->stats->cqes += i;
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->cc = sqcc;
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_rq *rq;
+ bool is_redirect;
+
+ is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
+ rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq);
+
+ while (sq->cc != sq->pc) {
+ u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+
+ sq->cc++;
+
+ if (is_redirect) {
+ dma_unmap_single(sq->pdev, xdpi->dma_addr,
+ xdpi->xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi->xdpf);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi->di, false);
+ }
+ }
+}
+
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_xdpsq *sq;
+ int drops = 0;
+ int sq_num;
+ int i;
+
+ /* this flag is sufficient, no need to test internal sq state */
+ if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
+ return -ENETDOWN;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ sq_num = smp_processor_id();
+
+ if (unlikely(sq_num >= priv->channels.num))
+ return -ENXIO;
+
+ sq = &priv->channels.c[sq_num]->xdpsq;
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ struct mlx5e_xdp_info xdpi;
+
+ xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ continue;
+ }
+
+ xdpi.xdpf = xdpf;
+
+ if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
+ dma_unmap_single(sq->pdev, xdpi.dma_addr,
+ xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+
+ if (flags & XDP_XMIT_FLUSH)
+ mlx5e_xmit_xdp_doorbell(sq);
+
+ return n - drops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
new file mode 100644
index 000000000..827ceef5f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_XDP_H__
+#define __MLX5_EN_XDP_H__
+
+#include "en.h"
+
+#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+#define MLX5E_XDP_TX_DS_COUNT \
+ ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+ void *va, u16 *rx_headroom, u32 *len);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
+
+bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+
+static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
+{
+ clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+ /* let other device's napi(s) see our new state */
+ synchronize_rcu();
+}
+
+static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl);
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
new file mode 100644
index 000000000..1dd225380
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_EN_ACCEL_H__
+#define __MLX5E_EN_ACCEL_H__
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls_rxtx.h"
+#include "en.h"
+
+static inline void
+mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
+{
+ int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr);
+
+ udp_hdr(skb)->len = htons(payload_len);
+}
+
+static inline struct sk_buff *
+mlx5e_accel_handle_tx(struct sk_buff *skb,
+ struct mlx5e_txqsq *sq,
+ struct net_device *dev,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi)
+{
+#ifdef CONFIG_MLX5_EN_TLS
+ if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
+ skb = mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi);
+ if (unlikely(!skb))
+ return NULL;
+ }
+#endif
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
+ skb = mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb);
+ if (unlikely(!skb))
+ return NULL;
+ }
+#endif
+
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ mlx5e_udp_gso_handle_tx_skb(skb);
+
+ return skb;
+}
+
+#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
new file mode 100644
index 000000000..c467f5e98
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -0,0 +1,547 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/internal/geniv.h>
+#include <crypto/aead.h>
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+
+#include "en.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+
+
+static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa;
+
+ if (!x)
+ return NULL;
+
+ sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+ if (!sa)
+ return NULL;
+
+ WARN_ON(sa->x != x);
+ return sa;
+}
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
+ unsigned int handle)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct xfrm_state *ret = NULL;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle)
+ if (sa_entry->handle == handle) {
+ ret = sa_entry->x;
+ xfrm_state_hold(ret);
+ break;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ unsigned long flags;
+ int ret;
+
+ ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+ sa_entry->handle = ret;
+ hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
+ spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+
+ return 0;
+}
+
+static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+ hash_del_rcu(&sa_entry->hlist);
+ spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+
+ /* xfrm already doing sync rcu between del and free callbacks */
+
+ ida_simple_remove(&ipsec->halloc, sa_entry->handle);
+}
+
+static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct xfrm_replay_state_esn *replay_esn;
+ u32 seq_bottom;
+ u8 overlap;
+ u32 *esn;
+
+ if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
+ sa_entry->esn_state.trigger = 0;
+ return false;
+ }
+
+ replay_esn = sa_entry->x->replay_esn;
+ seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
+ overlap = sa_entry->esn_state.overlap;
+
+ sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
+ htonl(seq_bottom));
+ esn = &sa_entry->esn_state.esn;
+
+ sa_entry->esn_state.trigger = 1;
+ if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
+ ++(*esn);
+ sa_entry->esn_state.overlap = 0;
+ return true;
+ } else if (unlikely(!overlap &&
+ (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
+ sa_entry->esn_state.overlap = 1;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
+ struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct xfrm_state *x = sa_entry->x;
+ struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm;
+ struct aead_geniv_ctx *geniv_ctx;
+ struct crypto_aead *aead;
+ unsigned int crypto_data_len, key_len;
+ int ivsize;
+
+ memset(attrs, 0, sizeof(*attrs));
+
+ /* key */
+ crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+ key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+
+ memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
+ aes_gcm->key_len = key_len * 8;
+
+ /* salt and seq_iv */
+ aead = x->data;
+ geniv_ctx = crypto_aead_ctx(aead);
+ ivsize = crypto_aead_ivsize(aead);
+ memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
+ memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
+ sizeof(aes_gcm->salt));
+
+ /* iv len */
+ aes_gcm->icv_len = x->aead->alg_icv_len;
+
+ /* esn */
+ if (sa_entry->esn_state.trigger) {
+ attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+ attrs->esn = sa_entry->esn_state.esn;
+ if (sa_entry->esn_state.overlap)
+ attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+ }
+
+ /* rx handle */
+ attrs->sa_handle = sa_entry->handle;
+
+ /* algo type */
+ attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+ /* action */
+ attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ?
+ MLX5_ACCEL_ESP_ACTION_ENCRYPT :
+ MLX5_ACCEL_ESP_ACTION_DECRYPT;
+ /* flags */
+ attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
+ MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
+ MLX5_ACCEL_ESP_FLAGS_TUNNEL;
+}
+
+static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
+{
+ struct net_device *netdev = x->xso.dev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+
+ if (x->props.aalgo != SADB_AALG_NONE) {
+ netdev_info(netdev, "Cannot offload authenticated xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
+ netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->props.calgo != SADB_X_CALG_NONE) {
+ netdev_info(netdev, "Cannot offload compressed xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.flags & XFRM_STATE_ESN &&
+ !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_ESN)) {
+ netdev_info(netdev, "Cannot offload ESN xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.family != AF_INET &&
+ x->props.family != AF_INET6) {
+ netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->props.mode != XFRM_MODE_TRANSPORT &&
+ x->props.mode != XFRM_MODE_TUNNEL) {
+ dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->id.proto != IPPROTO_ESP) {
+ netdev_info(netdev, "Only ESP xfrm state may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->encap) {
+ netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n");
+ return -EINVAL;
+ }
+ if (!x->aead) {
+ netdev_info(netdev, "Cannot offload xfrm states without aead\n");
+ return -EINVAL;
+ }
+ if (x->aead->alg_icv_len != 128) {
+ netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+ return -EINVAL;
+ }
+ if ((x->aead->alg_key_len != 128 + 32) &&
+ (x->aead->alg_key_len != 256 + 32)) {
+ netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+ return -EINVAL;
+ }
+ if (x->tfcpad) {
+ netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n");
+ return -EINVAL;
+ }
+ if (!x->geniv) {
+ netdev_info(netdev, "Cannot offload xfrm states without geniv\n");
+ return -EINVAL;
+ }
+ if (strcmp(x->geniv, "seqiv")) {
+ netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
+ return -EINVAL;
+ }
+ if (x->props.family == AF_INET6 &&
+ !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_IPV6)) {
+ netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int mlx5e_xfrm_add_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
+ struct net_device *netdev = x->xso.dev;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
+ struct mlx5e_priv *priv;
+ __be32 saddr[4] = {0}, daddr[4] = {0}, spi;
+ bool is_ipv6 = false;
+ int err;
+
+ priv = netdev_priv(netdev);
+
+ err = mlx5e_xfrm_validate_state(x);
+ if (err)
+ return err;
+
+ sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+ if (!sa_entry) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sa_entry->x = x;
+ sa_entry->ipsec = priv->ipsec;
+
+ /* Add the SA to handle processed incoming packets before the add SA
+ * completion was received
+ */
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ err = mlx5e_ipsec_sadb_rx_add(sa_entry);
+ if (err) {
+ netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
+ goto err_entry;
+ }
+ } else {
+ sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
+ mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
+ }
+
+ /* check esn */
+ mlx5e_ipsec_update_esn_state(sa_entry);
+
+ /* create xfrm */
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
+ sa_entry->xfrm =
+ mlx5_accel_esp_create_xfrm(priv->mdev, &attrs,
+ MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
+ if (IS_ERR(sa_entry->xfrm)) {
+ err = PTR_ERR(sa_entry->xfrm);
+ goto err_sadb_rx;
+ }
+
+ /* create hw context */
+ if (x->props.family == AF_INET) {
+ saddr[3] = x->props.saddr.a4;
+ daddr[3] = x->id.daddr.a4;
+ } else {
+ memcpy(saddr, x->props.saddr.a6, sizeof(saddr));
+ memcpy(daddr, x->id.daddr.a6, sizeof(daddr));
+ is_ipv6 = true;
+ }
+ spi = x->id.spi;
+ sa_entry->hw_context =
+ mlx5_accel_esp_create_hw_context(priv->mdev,
+ sa_entry->xfrm,
+ saddr, daddr, spi,
+ is_ipv6);
+ if (IS_ERR(sa_entry->hw_context)) {
+ err = PTR_ERR(sa_entry->hw_context);
+ goto err_xfrm;
+ }
+
+ x->xso.offload_handle = (unsigned long)sa_entry;
+ goto out;
+
+err_xfrm:
+ mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
+err_sadb_rx:
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ mlx5e_ipsec_sadb_rx_del(sa_entry);
+ mlx5e_ipsec_sadb_rx_free(sa_entry);
+ }
+err_entry:
+ kfree(sa_entry);
+out:
+ return err;
+}
+
+static void mlx5e_xfrm_del_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+
+ if (!sa_entry)
+ return;
+
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ mlx5e_ipsec_sadb_rx_del(sa_entry);
+}
+
+static void mlx5e_xfrm_free_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+
+ if (!sa_entry)
+ return;
+
+ if (sa_entry->hw_context) {
+ flush_workqueue(sa_entry->ipsec->wq);
+ mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
+ mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
+ }
+
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ mlx5e_ipsec_sadb_rx_free(sa_entry);
+
+ kfree(sa_entry);
+}
+
+int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec = NULL;
+
+ if (!MLX5_IPSEC_DEV(priv->mdev)) {
+ netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
+ return 0;
+ }
+
+ ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+ if (!ipsec)
+ return -ENOMEM;
+
+ hash_init(ipsec->sadb_rx);
+ spin_lock_init(&ipsec->sadb_rx_lock);
+ ida_init(&ipsec->halloc);
+ ipsec->en_priv = priv;
+ ipsec->en_priv->ipsec = ipsec;
+ ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
+ ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
+ priv->netdev->name);
+ if (!ipsec->wq) {
+ kfree(ipsec);
+ return -ENOMEM;
+ }
+ netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
+ return 0;
+}
+
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+ if (!ipsec)
+ return;
+
+ drain_workqueue(ipsec->wq);
+ destroy_workqueue(ipsec->wq);
+
+ ida_destroy(&ipsec->halloc);
+ kfree(ipsec);
+ priv->ipsec = NULL;
+}
+
+static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+ if (x->props.family == AF_INET) {
+ /* Offload with IPv4 options is not supported yet */
+ if (ip_hdr(skb)->ihl > 5)
+ return false;
+ } else {
+ /* Offload with IPv6 extension headers is not support yet */
+ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+ return false;
+ }
+
+ return true;
+}
+
+struct mlx5e_ipsec_modify_state_work {
+ struct work_struct work;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+};
+
+static void _update_xfrm_state(struct work_struct *work)
+{
+ int ret;
+ struct mlx5e_ipsec_modify_state_work *modify_work =
+ container_of(work, struct mlx5e_ipsec_modify_state_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry;
+
+ ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm,
+ &modify_work->attrs);
+ if (ret)
+ netdev_warn(sa_entry->ipsec->en_priv->netdev,
+ "Not an IPSec offload device\n");
+
+ kfree(modify_work);
+}
+
+static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+ struct mlx5e_ipsec_modify_state_work *modify_work;
+ bool need_update;
+
+ if (!sa_entry)
+ return;
+
+ need_update = mlx5e_ipsec_update_esn_state(sa_entry);
+ if (!need_update)
+ return;
+
+ modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC);
+ if (!modify_work)
+ return;
+
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
+ modify_work->sa_entry = sa_entry;
+
+ INIT_WORK(&modify_work->work, _update_xfrm_state);
+ WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work));
+}
+
+static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
+ .xdo_dev_state_add = mlx5e_xfrm_add_state,
+ .xdo_dev_state_delete = mlx5e_xfrm_del_state,
+ .xdo_dev_state_free = mlx5e_xfrm_free_state,
+ .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok,
+ .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
+};
+
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *netdev = priv->netdev;
+
+ if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
+ !MLX5_CAP_ETH(mdev, swp)) {
+ mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
+ return;
+ }
+
+ mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
+ netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
+ netdev->features |= NETIF_F_HW_ESP;
+ netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+ if (!MLX5_CAP_ETH(mdev, swp_csum)) {
+ mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
+ return;
+ }
+
+ netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
+ netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+
+ if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
+ !MLX5_CAP_ETH(mdev, swp_lso)) {
+ mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
+ return;
+ }
+
+ mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
+ netdev->features |= NETIF_F_GSO_ESP;
+ netdev->hw_features |= NETIF_F_GSO_ESP;
+ netdev->hw_enc_features |= NETIF_F_GSO_ESP;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
new file mode 100644
index 000000000..93bf10e65
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_H__
+#define __MLX5E_IPSEC_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/mlx5/device.h>
+#include <net/xfrm.h>
+#include <linux/idr.h>
+
+#include "accel/ipsec.h"
+
+#define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
+
+struct mlx5e_priv;
+
+struct mlx5e_ipsec_sw_stats {
+ atomic64_t ipsec_rx_drop_sp_alloc;
+ atomic64_t ipsec_rx_drop_sadb_miss;
+ atomic64_t ipsec_rx_drop_syndrome;
+ atomic64_t ipsec_tx_drop_bundle;
+ atomic64_t ipsec_tx_drop_no_state;
+ atomic64_t ipsec_tx_drop_not_ip;
+ atomic64_t ipsec_tx_drop_trailer;
+ atomic64_t ipsec_tx_drop_metadata;
+};
+
+struct mlx5e_ipsec_stats {
+ u64 ipsec_dec_in_packets;
+ u64 ipsec_dec_out_packets;
+ u64 ipsec_dec_bypass_packets;
+ u64 ipsec_enc_in_packets;
+ u64 ipsec_enc_out_packets;
+ u64 ipsec_enc_bypass_packets;
+ u64 ipsec_dec_drop_packets;
+ u64 ipsec_dec_auth_fail_packets;
+ u64 ipsec_enc_drop_packets;
+ u64 ipsec_add_sa_success;
+ u64 ipsec_add_sa_fail;
+ u64 ipsec_del_sa_success;
+ u64 ipsec_del_sa_fail;
+ u64 ipsec_cmd_drop;
+};
+
+struct mlx5e_ipsec {
+ struct mlx5e_priv *en_priv;
+ DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+ bool no_trailer;
+ spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
+ struct ida halloc;
+ struct mlx5e_ipsec_sw_stats sw_stats;
+ struct mlx5e_ipsec_stats stats;
+ struct workqueue_struct *wq;
+};
+
+struct mlx5e_ipsec_esn_state {
+ u32 esn;
+ u8 trigger: 1;
+ u8 overlap: 1;
+};
+
+struct mlx5e_ipsec_sa_entry {
+ struct hlist_node hlist; /* Item in SADB_RX hashtable */
+ struct mlx5e_ipsec_esn_state esn_state;
+ unsigned int handle; /* Handle in SADB_RX */
+ struct xfrm_state *x;
+ struct mlx5e_ipsec *ipsec;
+ struct mlx5_accel_esp_xfrm *xfrm;
+ void *hw_context;
+ void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo);
+};
+
+void mlx5e_ipsec_build_inverse_table(void);
+int mlx5e_ipsec_init(struct mlx5e_priv *priv);
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
+ unsigned int handle);
+
+#else
+
+static inline void mlx5e_ipsec_build_inverse_table(void)
+{
+}
+
+static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+}
+
+static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv,
+ uint8_t *data)
+{
+ return 0;
+}
+
+static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ return 0;
+}
+
+#endif
+
+#endif /* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
new file mode 100644
index 000000000..128a82b1d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/aead.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/ipsec.h"
+#include "accel/accel.h"
+#include "en.h"
+
+enum {
+ MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
+ MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
+ MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17,
+};
+
+struct mlx5e_ipsec_rx_metadata {
+ unsigned char nexthdr;
+ __be32 sa_handle;
+} __packed;
+
+enum {
+ MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
+ MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
+};
+
+struct mlx5e_ipsec_tx_metadata {
+ __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */
+ __be16 seq; /* LSBs of the first TCP seq, only for LSO */
+ u8 esp_next_proto; /* Next protocol of ESP */
+} __packed;
+
+struct mlx5e_ipsec_metadata {
+ unsigned char syndrome;
+ union {
+ unsigned char raw[5];
+ /* from FPGA to host, on successful decrypt */
+ struct mlx5e_ipsec_rx_metadata rx;
+ /* from host to FPGA */
+ struct mlx5e_ipsec_tx_metadata tx;
+ } __packed content;
+ /* packet type ID field */
+ __be16 ethertype;
+} __packed;
+
+#define MAX_LSO_MSS 2048
+
+/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
+static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
+
+static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
+{
+ return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
+}
+
+static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
+{
+ struct mlx5e_ipsec_metadata *mdata;
+ struct ethhdr *eth;
+
+ if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
+ return ERR_PTR(-ENOMEM);
+
+ eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
+ skb->mac_header -= sizeof(*mdata);
+ mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
+
+ memmove(skb->data, skb->data + sizeof(*mdata),
+ 2 * ETH_ALEN);
+
+ eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+
+ memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
+ return mdata;
+}
+
+static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+{
+ unsigned int alen = crypto_aead_authsize(x->data);
+ struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
+ struct iphdr *ipv4hdr = ip_hdr(skb);
+ unsigned int trailer_len;
+ u8 plen;
+ int ret;
+
+ ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1);
+ if (unlikely(ret))
+ return ret;
+
+ trailer_len = alen + plen + 2;
+
+ pskb_trim(skb, skb->len - trailer_len);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+ ip_send_check(ipv4hdr);
+ } else {
+ ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) -
+ trailer_len);
+ }
+ return 0;
+}
+
+static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, u8 mode,
+ struct xfrm_offload *xo)
+{
+ u8 proto;
+
+ /* Tunnel Mode:
+ * SWP: OutL3 InL3 InL4
+ * Pkt: MAC IP ESP IP L4
+ *
+ * Transport Mode:
+ * SWP: OutL3 InL4
+ * InL3
+ * Pkt: MAC IP ESP L4
+ *
+ * Offsets are in 2-byte words, counting from start of frame
+ */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (skb->protocol == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+ if (mode == XFRM_MODE_TUNNEL) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (xo->proto == IPPROTO_IPV6) {
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ proto = inner_ipv6_hdr(skb)->nexthdr;
+ } else {
+ proto = inner_ip_hdr(skb)->protocol;
+ }
+ } else {
+ eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+ if (skb->protocol == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ proto = xo->proto;
+ }
+ switch (proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ /* Fall through */
+ case IPPROTO_TCP:
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ }
+}
+
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo)
+{
+ struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ __u32 oseq = replay_esn->oseq;
+ int iv_offset;
+ __be64 seqno;
+ u32 seq_hi;
+
+ if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID &&
+ MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) {
+ seq_hi = xo->seq.hi - 1;
+ } else {
+ seq_hi = xo->seq.hi;
+ }
+
+ /* Place the SN in the IV field */
+ seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32));
+ iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+ skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo)
+{
+ int iv_offset;
+ __be64 seqno;
+
+ /* Place the SN in the IV field */
+ seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+ iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+ skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
+ struct mlx5e_ipsec_metadata *mdata,
+ struct xfrm_offload *xo)
+{
+ struct ip_esp_hdr *esph;
+ struct tcphdr *tcph;
+
+ if (skb_is_gso(skb)) {
+ /* Add LSO metadata indication */
+ esph = ip_esp_hdr(skb);
+ tcph = inner_tcp_hdr(skb);
+ netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
+ skb->network_header,
+ skb->transport_header,
+ skb->inner_network_header,
+ skb->inner_transport_header);
+ netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
+ skb->len, skb_shinfo(skb)->gso_size,
+ ntohs(tcph->source), ntohs(tcph->dest),
+ ntohl(tcph->seq), ntohl(esph->seq_no));
+ mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
+ mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
+ mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
+ } else {
+ mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
+ }
+ mdata->content.tx.esp_next_proto = xo->proto;
+
+ netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
+ mdata->syndrome, mdata->content.tx.esp_next_proto,
+ ntohs(mdata->content.tx.mss_inv),
+ ntohs(mdata->content.tx.seq));
+}
+
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_tx_wqe *wqe,
+ struct sk_buff *skb)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct mlx5e_ipsec_metadata *mdata;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct xfrm_state *x;
+
+ if (!xo)
+ return skb;
+
+ if (unlikely(skb->sp->len != 1)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
+ goto drop;
+ }
+
+ x = xfrm_input_state(skb);
+ if (unlikely(!x)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state);
+ goto drop;
+ }
+
+ if (unlikely(!x->xso.offload_handle ||
+ (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6)))) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip);
+ goto drop;
+ }
+
+ if (!skb_is_gso(skb))
+ if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
+ goto drop;
+ }
+ mdata = mlx5e_ipsec_add_metadata(skb);
+ if (IS_ERR(mdata)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+ goto drop;
+ }
+ mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
+ sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+ sa_entry->set_iv_op(skb, x, xo);
+ mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+ return skb;
+
+drop:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static inline struct xfrm_state *
+mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
+ struct mlx5e_ipsec_metadata *mdata)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct xfrm_offload *xo;
+ struct xfrm_state *xs;
+ u32 sa_handle;
+
+ skb->sp = secpath_dup(skb->sp);
+ if (unlikely(!skb->sp)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
+ return NULL;
+ }
+
+ sa_handle = be32_to_cpu(mdata->content.rx.sa_handle);
+ xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle);
+ if (unlikely(!xs)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
+ return NULL;
+ }
+
+ skb->sp->xvec[skb->sp->len++] = xs;
+ skb->sp->olen++;
+
+ xo = xfrm_offload(skb);
+ xo->flags = CRYPTO_DONE;
+ switch (mdata->syndrome) {
+ case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
+ xo->status = CRYPTO_SUCCESS;
+ if (likely(priv->ipsec->no_trailer)) {
+ xo->flags |= XFRM_ESP_NO_TRAILER;
+ xo->proto = mdata->content.rx.nexthdr;
+ }
+ break;
+ case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
+ xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
+ break;
+ case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO:
+ xo->status = CRYPTO_INVALID_PROTOCOL;
+ break;
+ default:
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
+ return NULL;
+ }
+ return xs;
+}
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb, u32 *cqe_bcnt)
+{
+ struct mlx5e_ipsec_metadata *mdata;
+ struct xfrm_state *xs;
+
+ if (!is_metadata_hdr_valid(skb))
+ return skb;
+
+ /* Use the metadata */
+ mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN);
+ xs = mlx5e_ipsec_build_sp(netdev, skb, mdata);
+ if (unlikely(!xs)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ remove_metadata_hdr(skb);
+ *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
+
+ return skb;
+}
+
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct xfrm_state *x;
+
+ if (skb->sp && skb->sp->len) {
+ x = skb->sp->xvec[0];
+ if (x && x->xso.offload_handle)
+ return true;
+ }
+ return false;
+}
+
+void mlx5e_ipsec_build_inverse_table(void)
+{
+ u16 mss_inv;
+ u32 mss;
+
+ /* Calculate 1/x inverse table for use in GSO data path.
+ * Using this table, we provide the IPSec accelerator with the value of
+ * 1/gso_size so that it can infer the position of each segment inside
+ * the GSO, and increment the ESP sequence number, and generate the IV.
+ * The HW needs this value in Q0.16 fixed-point number format
+ */
+ mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
+ for (mss = 2; mss < MAX_LSO_MSS; mss++) {
+ mss_inv = div_u64(1ULL << 32, mss) >> 16;
+ mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
new file mode 100644
index 000000000..ca47c0540
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_RXTX_H__
+#define __MLX5E_IPSEC_RXTX_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/skbuff.h>
+#include <net/xfrm.h>
+#include "en.h"
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb, u32 *cqe_bcnt);
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+void mlx5e_ipsec_inverse_table_init(void);
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+ netdev_features_t features);
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo);
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo);
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_tx_wqe *wqe,
+ struct sk_buff *skb);
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
+
+#endif /* __MLX5E_IPSEC_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
new file mode 100644
index 000000000..6fea59223
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "accel/ipsec.h"
+#include "fpga/sdk.h"
+#include "en_accel/ipsec.h"
+
+static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) },
+};
+
+static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
+#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
+
+#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS)
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+ if (!priv->ipsec)
+ return 0;
+
+ return NUM_IPSEC_COUNTERS;
+}
+
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ unsigned int i, idx = 0;
+
+ if (!priv->ipsec)
+ return 0;
+
+ for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_ipsec_hw_stats_desc[i].format);
+
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_ipsec_sw_stats_desc[i].format);
+
+ return NUM_IPSEC_COUNTERS;
+}
+
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+ int ret;
+
+ if (!priv->ipsec)
+ return;
+
+ ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
+ NUM_IPSEC_HW_COUNTERS);
+ if (ret)
+ memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats));
+}
+
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ int i, idx = 0;
+
+ if (!priv->ipsec)
+ return 0;
+
+ for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
+ mlx5e_ipsec_hw_stats_desc, i);
+
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
+ mlx5e_ipsec_sw_stats_desc, i);
+
+ return NUM_IPSEC_COUNTERS;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
new file mode 100644
index 000000000..e88340e19
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include "en_accel/tls.h"
+#include "accel/tls.h"
+
+static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ MLX5_SET(tls_flow, flow, ipv6, 0);
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ MLX5_SET(tls_flow, flow, ipv6, 1);
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+}
+#endif
+
+static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport,
+ MLX5_FLD_SZ_BYTES(tls_flow, src_port));
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport,
+ MLX5_FLD_SZ_BYTES(tls_flow, dst_port));
+}
+
+static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps)
+{
+ switch (sk->sk_family) {
+ case AF_INET:
+ mlx5e_tls_set_ipv4_flow(flow, sk);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ if (!sk->sk_ipv6only &&
+ ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
+ mlx5e_tls_set_ipv4_flow(flow, sk);
+ break;
+ }
+ if (!(caps & MLX5_ACCEL_TLS_IPV6))
+ goto error_out;
+
+ mlx5e_tls_set_ipv6_flow(flow, sk);
+ break;
+#endif
+ default:
+ goto error_out;
+ }
+
+ mlx5e_tls_set_flow_tcp_ports(flow, sk);
+ return 0;
+error_out:
+ return -EINVAL;
+}
+
+static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 caps = mlx5_accel_tls_device_caps(mdev);
+ int ret = -ENOMEM;
+ void *flow;
+ u32 swid;
+
+ flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL);
+ if (!flow)
+ return ret;
+
+ ret = mlx5e_tls_set_flow(flow, sk, caps);
+ if (ret)
+ goto free_flow;
+
+ ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info,
+ start_offload_tcp_sn, &swid,
+ direction == TLS_OFFLOAD_CTX_DIR_TX);
+ if (ret < 0)
+ goto free_flow;
+
+ if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+ struct mlx5e_tls_offload_context_tx *tx_ctx =
+ mlx5e_get_tls_tx_context(tls_ctx);
+
+ tx_ctx->swid = htonl(swid);
+ tx_ctx->expected_seq = start_offload_tcp_sn;
+ } else {
+ struct mlx5e_tls_offload_context_rx *rx_ctx =
+ mlx5e_get_tls_rx_context(tls_ctx);
+
+ rx_ctx->handle = htonl(swid);
+ }
+
+ return 0;
+free_flow:
+ kfree(flow);
+ return ret;
+}
+
+static void mlx5e_tls_del(struct net_device *netdev,
+ struct tls_context *tls_ctx,
+ enum tls_offload_ctx_dir direction)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ unsigned int handle;
+
+ handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ?
+ mlx5e_get_tls_tx_context(tls_ctx)->swid :
+ mlx5e_get_tls_rx_context(tls_ctx)->handle);
+
+ mlx5_accel_tls_del_flow(priv->mdev, handle,
+ direction == TLS_OFFLOAD_CTX_DIR_TX);
+}
+
+static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk,
+ u32 seq, u64 rcd_sn)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_tls_offload_context_rx *rx_ctx;
+
+ rx_ctx = mlx5e_get_tls_rx_context(tls_ctx);
+
+ netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq,
+ be64_to_cpu(rcd_sn));
+ mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn);
+ atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply);
+}
+
+static const struct tlsdev_ops mlx5e_tls_ops = {
+ .tls_dev_add = mlx5e_tls_add,
+ .tls_dev_del = mlx5e_tls_del,
+ .tls_dev_resync_rx = mlx5e_tls_resync_rx,
+};
+
+void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ u32 caps;
+
+ if (!mlx5_accel_is_tls_device(priv->mdev))
+ return;
+
+ caps = mlx5_accel_tls_device_caps(priv->mdev);
+ if (caps & MLX5_ACCEL_TLS_TX) {
+ netdev->features |= NETIF_F_HW_TLS_TX;
+ netdev->hw_features |= NETIF_F_HW_TLS_TX;
+ }
+
+ if (caps & MLX5_ACCEL_TLS_RX) {
+ netdev->features |= NETIF_F_HW_TLS_RX;
+ netdev->hw_features |= NETIF_F_HW_TLS_RX;
+ }
+
+ if (!(caps & MLX5_ACCEL_TLS_LRO)) {
+ netdev->features &= ~NETIF_F_LRO;
+ netdev->hw_features &= ~NETIF_F_LRO;
+ }
+
+ netdev->tlsdev_ops = &mlx5e_tls_ops;
+}
+
+int mlx5e_tls_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+
+ if (!tls)
+ return -ENOMEM;
+
+ priv->tls = tls;
+ return 0;
+}
+
+void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls = priv->tls;
+
+ if (!tls)
+ return;
+
+ kfree(tls);
+ priv->tls = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
new file mode 100644
index 000000000..3f5d72163
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef __MLX5E_TLS_H__
+#define __MLX5E_TLS_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <net/tls.h>
+#include "en.h"
+
+struct mlx5e_tls_sw_stats {
+ atomic64_t tx_tls_drop_metadata;
+ atomic64_t tx_tls_drop_resync_alloc;
+ atomic64_t tx_tls_drop_no_sync_data;
+ atomic64_t tx_tls_drop_bypass_required;
+ atomic64_t rx_tls_drop_resync_request;
+ atomic64_t rx_tls_resync_request;
+ atomic64_t rx_tls_resync_reply;
+ atomic64_t rx_tls_auth_fail;
+};
+
+struct mlx5e_tls {
+ struct mlx5e_tls_sw_stats sw_stats;
+};
+
+struct mlx5e_tls_offload_context_tx {
+ struct tls_offload_context_tx base;
+ u32 expected_seq;
+ __be32 swid;
+};
+
+static inline struct mlx5e_tls_offload_context_tx *
+mlx5e_get_tls_tx_context(struct tls_context *tls_ctx)
+{
+ BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) >
+ TLS_OFFLOAD_CONTEXT_SIZE_TX);
+ return container_of(tls_offload_ctx_tx(tls_ctx),
+ struct mlx5e_tls_offload_context_tx,
+ base);
+}
+
+struct mlx5e_tls_offload_context_rx {
+ struct tls_offload_context_rx base;
+ __be32 handle;
+};
+
+static inline struct mlx5e_tls_offload_context_rx *
+mlx5e_get_tls_rx_context(struct tls_context *tls_ctx)
+{
+ BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) >
+ TLS_OFFLOAD_CONTEXT_SIZE_RX);
+ return container_of(tls_offload_ctx_rx(tls_ctx),
+ struct mlx5e_tls_offload_context_rx,
+ base);
+}
+
+void mlx5e_tls_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_tls_init(struct mlx5e_priv *priv);
+void mlx5e_tls_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_tls_get_count(struct mlx5e_priv *priv);
+int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+#else
+
+static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { }
+static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
+static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
+static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
+
+#endif
+
+#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
new file mode 100644
index 000000000..be137d4a9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "en_accel/tls.h"
+#include "en_accel/tls_rxtx.h"
+#include "accel/accel.h"
+
+#include <net/inet6_hashtables.h>
+#include <linux/ipv6.h>
+
+#define SYNDROM_DECRYPTED 0x30
+#define SYNDROM_RESYNC_REQUEST 0x31
+#define SYNDROM_AUTH_FAILED 0x32
+
+#define SYNDROME_OFFLOAD_REQUIRED 32
+#define SYNDROME_SYNC 33
+
+struct sync_info {
+ u64 rcd_sn;
+ s32 sync_len;
+ int nr_frags;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+struct recv_metadata_content {
+ u8 syndrome;
+ u8 reserved;
+ __be32 sync_seq;
+} __packed;
+
+struct send_metadata_content {
+ /* One byte of syndrome followed by 3 bytes of swid */
+ __be32 syndrome_swid;
+ __be16 first_seq;
+} __packed;
+
+struct mlx5e_tls_metadata {
+ union {
+ /* from fpga to host */
+ struct recv_metadata_content recv;
+ /* from host to fpga */
+ struct send_metadata_content send;
+ unsigned char raw[6];
+ } __packed content;
+ /* packet type ID field */
+ __be16 ethertype;
+} __packed;
+
+static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid)
+{
+ struct mlx5e_tls_metadata *pet;
+ struct ethhdr *eth;
+
+ if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata)))
+ return -ENOMEM;
+
+ eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata));
+ skb->mac_header -= sizeof(struct mlx5e_tls_metadata);
+ pet = (struct mlx5e_tls_metadata *)(eth + 1);
+
+ memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata),
+ 2 * ETH_ALEN);
+
+ eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+ pet->content.send.syndrome_swid =
+ htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid;
+
+ return 0;
+}
+
+static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context,
+ u32 tcp_seq, struct sync_info *info)
+{
+ int remaining, i = 0, ret = -EINVAL;
+ struct tls_record_info *record;
+ unsigned long flags;
+ s32 sync_size;
+
+ spin_lock_irqsave(&context->base.lock, flags);
+ record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn);
+
+ if (unlikely(!record))
+ goto out;
+
+ sync_size = tcp_seq - tls_record_start_seq(record);
+ info->sync_len = sync_size;
+ if (unlikely(sync_size < 0)) {
+ if (tls_record_is_start_marker(record))
+ goto done;
+
+ goto out;
+ }
+
+ remaining = sync_size;
+ while (remaining > 0) {
+ info->frags[i] = record->frags[i];
+ __skb_frag_ref(&info->frags[i]);
+ remaining -= skb_frag_size(&info->frags[i]);
+
+ if (remaining < 0)
+ skb_frag_size_add(&info->frags[i], remaining);
+
+ i++;
+ }
+ info->nr_frags = i;
+done:
+ ret = 0;
+out:
+ spin_unlock_irqrestore(&context->base.lock, flags);
+ return ret;
+}
+
+static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
+ struct sk_buff *nskb, u32 tcp_seq,
+ int headln, __be64 rcd_sn)
+{
+ struct mlx5e_tls_metadata *pet;
+ u8 syndrome = SYNDROME_SYNC;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ int data_len, mss;
+
+ nskb->dev = skb->dev;
+ skb_reset_mac_header(nskb);
+ skb_set_network_header(nskb, skb_network_offset(skb));
+ skb_set_transport_header(nskb, skb_transport_offset(skb));
+ memcpy(nskb->data, skb->data, headln);
+ memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn));
+
+ iph = ip_hdr(nskb);
+ iph->tot_len = htons(nskb->len - skb_network_offset(nskb));
+ th = tcp_hdr(nskb);
+ data_len = nskb->len - headln;
+ tcp_seq -= data_len;
+ th->seq = htonl(tcp_seq);
+
+ mss = nskb->dev->mtu - (headln - skb_network_offset(nskb));
+ skb_shinfo(nskb)->gso_size = 0;
+ if (data_len > mss) {
+ skb_shinfo(nskb)->gso_size = mss;
+ skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss);
+ }
+ skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
+
+ pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr));
+ memcpy(pet, &syndrome, sizeof(syndrome));
+ pet->content.send.first_seq = htons(tcp_seq);
+
+ /* MLX5 devices don't care about the checksum partial start, offset
+ * and pseudo header
+ */
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+
+ nskb->xmit_more = 1;
+ nskb->queue_mapping = skb->queue_mapping;
+}
+
+static struct sk_buff *
+mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
+ struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi,
+ struct mlx5e_tls *tls)
+{
+ u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ struct sync_info info;
+ struct sk_buff *nskb;
+ int linear_len = 0;
+ int headln;
+ int i;
+
+ sq->stats->tls_ooo++;
+
+ if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) {
+ /* We might get here if a retransmission reaches the driver
+ * after the relevant record is acked.
+ * It should be safe to drop the packet in this case
+ */
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data);
+ goto err_out;
+ }
+
+ if (unlikely(info.sync_len < 0)) {
+ u32 payload;
+
+ headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ payload = skb->len - headln;
+ if (likely(payload <= -info.sync_len))
+ /* SKB payload doesn't require offload
+ */
+ return skb;
+
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required);
+ goto err_out;
+ }
+
+ if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata);
+ goto err_out;
+ }
+
+ headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ linear_len += headln + sizeof(info.rcd_sn);
+ nskb = alloc_skb(linear_len, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc);
+ goto err_out;
+ }
+
+ context->expected_seq = tcp_seq + skb->len - headln;
+ skb_put(nskb, linear_len);
+ for (i = 0; i < info.nr_frags; i++)
+ skb_shinfo(nskb)->frags[i] = info.frags[i];
+
+ skb_shinfo(nskb)->nr_frags = info.nr_frags;
+ nskb->data_len = info.sync_len;
+ nskb->len += info.sync_len;
+ sq->stats->tls_resync_bytes += nskb->len;
+ mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
+ cpu_to_be64(info.rcd_sn));
+ mlx5e_sq_xmit(sq, nskb, *wqe, *pi);
+ mlx5e_sq_fetch_wqe(sq, wqe, pi);
+ return skb;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
+
+struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_tls_offload_context_tx *context;
+ struct tls_context *tls_ctx;
+ u32 expected_seq;
+ int datalen;
+ u32 skb_seq;
+
+ if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+ goto out;
+
+ datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+ if (!datalen)
+ goto out;
+
+ tls_ctx = tls_get_ctx(skb->sk);
+ if (unlikely(tls_ctx->netdev != netdev))
+ goto out;
+
+ skb_seq = ntohl(tcp_hdr(skb)->seq);
+ context = mlx5e_get_tls_tx_context(tls_ctx);
+ expected_seq = context->expected_seq;
+
+ if (unlikely(expected_seq != skb_seq)) {
+ skb = mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls);
+ goto out;
+ }
+
+ if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
+ atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata);
+ dev_kfree_skb_any(skb);
+ skb = NULL;
+ goto out;
+ }
+
+ context->expected_seq = skb_seq + datalen;
+out:
+ return skb;
+}
+
+static int tls_update_resync_sn(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_tls_metadata *mdata)
+{
+ struct sock *sk = NULL;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ __be32 seq;
+
+ if (mdata->ethertype != htons(ETH_P_IP))
+ return -EINVAL;
+
+ iph = (struct iphdr *)(mdata + 1);
+
+ th = ((void *)iph) + iph->ihl * 4;
+
+ if (iph->version == 4) {
+ sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
+ iph->saddr, th->source, iph->daddr,
+ th->dest, netdev->ifindex);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph;
+
+ sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
+ &ipv6h->saddr, th->source,
+ &ipv6h->daddr, ntohs(th->dest),
+ netdev->ifindex, 0);
+#endif
+ }
+ if (!sk || sk->sk_state == TCP_TIME_WAIT) {
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request);
+ goto out;
+ }
+
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+
+ memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq));
+ tls_offload_rx_resync_request(sk, seq);
+out:
+ return 0;
+}
+
+void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
+ u32 *cqe_bcnt)
+{
+ struct mlx5e_tls_metadata *mdata;
+ struct mlx5e_priv *priv;
+
+ if (!is_metadata_hdr_valid(skb))
+ return;
+
+ /* Use the metadata */
+ mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN);
+ switch (mdata->content.recv.syndrome) {
+ case SYNDROM_DECRYPTED:
+ skb->decrypted = 1;
+ break;
+ case SYNDROM_RESYNC_REQUEST:
+ tls_update_resync_sn(netdev, skb, mdata);
+ priv = netdev_priv(netdev);
+ atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request);
+ break;
+ case SYNDROM_AUTH_FAILED:
+ /* Authentication failure will be observed and verified by kTLS */
+ priv = netdev_priv(netdev);
+ atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail);
+ break;
+ default:
+ /* Bypass the metadata header to others */
+ return;
+ }
+
+ remove_metadata_hdr(skb);
+ *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
new file mode 100644
index 000000000..311667ec7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_TLS_RXTX_H__
+#define __MLX5E_TLS_RXTX_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <linux/skbuff.h>
+#include "en.h"
+
+struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi);
+
+void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
+ u32 *cqe_bcnt);
+
+#endif /* CONFIG_MLX5_EN_TLS */
+
+#endif /* __MLX5E_TLS_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
new file mode 100644
index 000000000..01468ec27
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "accel/tls.h"
+#include "fpga/sdk.h"
+#include "en_accel/tls.h"
+
+static const struct counter_desc mlx5e_tls_sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc)
+
+int mlx5e_tls_get_count(struct mlx5e_priv *priv)
+{
+ if (!priv->tls)
+ return 0;
+
+ return NUM_TLS_SW_COUNTERS;
+}
+
+int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ unsigned int i, idx = 0;
+
+ if (!priv->tls)
+ return 0;
+
+ for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_tls_sw_stats_desc[i].format);
+
+ return NUM_TLS_SW_COUNTERS;
+}
+
+int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ int i, idx = 0;
+
+ if (!priv->tls)
+ return 0;
+
+ for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
+ mlx5e_tls_sw_stats_desc, i);
+
+ return NUM_TLS_SW_COUNTERS;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
new file mode 100644
index 000000000..a4be04deb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -0,0 +1,710 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include <linux/mlx5/fs.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "en.h"
+
+struct arfs_tuple {
+ __be16 etype;
+ u8 ip_proto;
+ union {
+ __be32 src_ipv4;
+ struct in6_addr src_ipv6;
+ };
+ union {
+ __be32 dst_ipv4;
+ struct in6_addr dst_ipv6;
+ };
+ __be16 src_port;
+ __be16 dst_port;
+};
+
+struct arfs_rule {
+ struct mlx5e_priv *priv;
+ struct work_struct arfs_work;
+ struct mlx5_flow_handle *rule;
+ struct hlist_node hlist;
+ int rxq;
+ /* Flow ID passed to ndo_rx_flow_steer */
+ int flow_id;
+ /* Filter ID returned by ndo_rx_flow_steer */
+ int filter_id;
+ struct arfs_tuple tuple;
+};
+
+#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \
+ for (i = 0; i < ARFS_NUM_TYPES; i++) \
+ mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j)
+
+#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \
+ for (j = 0; j < ARFS_HASH_SIZE; j++) \
+ hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
+
+static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
+{
+ switch (type) {
+ case ARFS_IPV4_TCP:
+ return MLX5E_TT_IPV4_TCP;
+ case ARFS_IPV4_UDP:
+ return MLX5E_TT_IPV4_UDP;
+ case ARFS_IPV6_TCP:
+ return MLX5E_TT_IPV6_TCP;
+ case ARFS_IPV6_UDP:
+ return MLX5E_TT_IPV6_UDP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int arfs_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_tir *tir = priv->indir_tir;
+ int err = 0;
+ int tt;
+ int i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ dest.tir_num = tir[i].tirn;
+ tt = arfs_get_tt(i);
+ /* Modify ttc rules destination to bypass the aRFS tables*/
+ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
+ &dest, NULL);
+ if (err) {
+ netdev_err(priv->netdev,
+ "%s: modify ttc destination failed\n",
+ __func__);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void arfs_del_rules(struct mlx5e_priv *priv);
+
+int mlx5e_arfs_disable(struct mlx5e_priv *priv)
+{
+ arfs_del_rules(priv);
+
+ return arfs_disable(priv);
+}
+
+int mlx5e_arfs_enable(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_destination dest = {};
+ int err = 0;
+ int tt;
+ int i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ dest.ft = priv->fs.arfs.arfs_tables[i].ft.t;
+ tt = arfs_get_tt(i);
+ /* Modify ttc rules destination to point on the aRFS FTs */
+ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
+ &dest, NULL);
+ if (err) {
+ netdev_err(priv->netdev,
+ "%s: modify ttc destination failed err=%d\n",
+ __func__, err);
+ arfs_disable(priv);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void arfs_destroy_table(struct arfs_table *arfs_t)
+{
+ mlx5_del_flow_rules(arfs_t->default_rule);
+ mlx5e_destroy_flow_table(&arfs_t->ft);
+}
+
+void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
+{
+ int i;
+
+ if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+ return;
+
+ arfs_del_rules(priv);
+ destroy_workqueue(priv->fs.arfs.wq);
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t))
+ arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]);
+ }
+}
+
+static int arfs_add_default_rule(struct mlx5e_priv *priv,
+ enum arfs_type type)
+{
+ struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
+ struct mlx5e_tir *tir = priv->indir_tir;
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_spec *spec;
+ enum mlx5e_traffic_types tt;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ tt = arfs_get_tt(type);
+ if (tt == -EINVAL) {
+ netdev_err(priv->netdev, "%s: bad arfs_type: %d\n",
+ __func__, type);
+ err = -EINVAL;
+ goto out;
+ }
+
+ dest.tir_num = tir[tt].tirn;
+
+ arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec,
+ &flow_act,
+ &dest, 1);
+ if (IS_ERR(arfs_t->default_rule)) {
+ err = PTR_ERR(arfs_t->default_rule);
+ arfs_t->default_rule = NULL;
+ netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
+ __func__, type);
+ }
+out:
+ kvfree(spec);
+ return err;
+}
+
+#define MLX5E_ARFS_NUM_GROUPS 2
+#define MLX5E_ARFS_GROUP1_SIZE (BIT(16) - 1)
+#define MLX5E_ARFS_GROUP2_SIZE BIT(0)
+#define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\
+ MLX5E_ARFS_GROUP2_SIZE)
+static int arfs_create_groups(struct mlx5e_flow_table *ft,
+ enum arfs_type type)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
+ sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kvfree(ft->g);
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc,
+ outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
+ switch (type) {
+ case ARFS_IPV4_TCP:
+ case ARFS_IPV6_TCP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
+ break;
+ case ARFS_IPV4_UDP:
+ case ARFS_IPV6_UDP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ switch (type) {
+ case ARFS_IPV4_TCP:
+ case ARFS_IPV4_UDP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ break;
+ case ARFS_IPV6_TCP:
+ case ARFS_IPV6_UDP:
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ARFS_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ARFS_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+out:
+ kvfree(in);
+
+ return err;
+}
+
+static int arfs_create_table(struct mlx5e_priv *priv,
+ enum arfs_type type)
+{
+ struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+ struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE;
+ ft_attr.level = MLX5E_ARFS_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = arfs_create_groups(ft, type);
+ if (err)
+ goto err;
+
+ err = arfs_add_default_rule(priv, type);
+ if (err)
+ goto err;
+
+ return 0;
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
+{
+ int err = 0;
+ int i;
+
+ if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+ return 0;
+
+ spin_lock_init(&priv->fs.arfs.arfs_lock);
+ INIT_LIST_HEAD(&priv->fs.arfs.rules);
+ priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs");
+ if (!priv->fs.arfs.wq)
+ return -ENOMEM;
+
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ err = arfs_create_table(priv, i);
+ if (err)
+ goto err;
+ }
+ return 0;
+err:
+ mlx5e_arfs_destroy_tables(priv);
+ return err;
+}
+
+#define MLX5E_ARFS_EXPIRY_QUOTA 60
+
+static void arfs_may_expire_flow(struct mlx5e_priv *priv)
+{
+ struct arfs_rule *arfs_rule;
+ struct hlist_node *htmp;
+ int quota = 0;
+ int i;
+ int j;
+
+ HLIST_HEAD(del_list);
+ spin_lock_bh(&priv->fs.arfs.arfs_lock);
+ mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
+ if (!work_pending(&arfs_rule->arfs_work) &&
+ rps_may_expire_flow(priv->netdev,
+ arfs_rule->rxq, arfs_rule->flow_id,
+ arfs_rule->filter_id)) {
+ hlist_del_init(&arfs_rule->hlist);
+ hlist_add_head(&arfs_rule->hlist, &del_list);
+ if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
+ break;
+ }
+ }
+ spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+ hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
+ if (arfs_rule->rule)
+ mlx5_del_flow_rules(arfs_rule->rule);
+ hlist_del(&arfs_rule->hlist);
+ kfree(arfs_rule);
+ }
+}
+
+static void arfs_del_rules(struct mlx5e_priv *priv)
+{
+ struct hlist_node *htmp;
+ struct arfs_rule *rule;
+ int i;
+ int j;
+
+ HLIST_HEAD(del_list);
+ spin_lock_bh(&priv->fs.arfs.arfs_lock);
+ mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
+ hlist_del_init(&rule->hlist);
+ hlist_add_head(&rule->hlist, &del_list);
+ }
+ spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+
+ hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
+ cancel_work_sync(&rule->arfs_work);
+ if (rule->rule)
+ mlx5_del_flow_rules(rule->rule);
+ hlist_del(&rule->hlist);
+ kfree(rule);
+ }
+}
+
+static struct hlist_head *
+arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port,
+ __be16 dst_port)
+{
+ unsigned long l;
+ int bucket_idx;
+
+ l = (__force unsigned long)src_port |
+ ((__force unsigned long)dst_port << 2);
+
+ bucket_idx = hash_long(l, ARFS_HASH_SHIFT);
+
+ return &arfs_t->rules_hash[bucket_idx];
+}
+
+static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
+ u8 ip_proto, __be16 etype)
+{
+ if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP)
+ return &arfs->arfs_tables[ARFS_IPV4_TCP];
+ if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP)
+ return &arfs->arfs_tables[ARFS_IPV4_UDP];
+ if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP)
+ return &arfs->arfs_tables[ARFS_IPV6_TCP];
+ if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP)
+ return &arfs->arfs_tables[ARFS_IPV6_UDP];
+
+ return NULL;
+}
+
+static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
+ struct arfs_rule *arfs_rule)
+{
+ struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+ struct arfs_tuple *tuple = &arfs_rule->tuple;
+ struct mlx5_flow_handle *rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct arfs_table *arfs_table;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype,
+ ntohs(tuple->etype));
+ arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
+ if (!arfs_table) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ ft = arfs_table->ft.t;
+ if (tuple->ip_proto == IPPROTO_TCP) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_sport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
+ ntohs(tuple->dst_port));
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
+ ntohs(tuple->src_port));
+ } else {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.udp_dport);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.udp_sport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport,
+ ntohs(tuple->dst_port));
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport,
+ ntohs(tuple->src_port));
+ }
+ if (tuple->etype == htons(ETH_P_IP)) {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &tuple->src_ipv4,
+ 4);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &tuple->dst_ipv4,
+ 4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &tuple->src_ipv6,
+ 16);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &tuple->dst_ipv6,
+ 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff,
+ 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff,
+ 16);
+ }
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
+ __func__, arfs_rule->filter_id, arfs_rule->rxq, err);
+ }
+
+out:
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule, u16 rxq)
+{
+ struct mlx5_flow_destination dst = {};
+ int err = 0;
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dst.tir_num = priv->direct_tir[rxq].tirn;
+ err = mlx5_modify_rule_destination(rule, &dst, NULL);
+ if (err)
+ netdev_warn(priv->netdev,
+ "Failed to modify aRFS rule destination to rq=%d\n", rxq);
+}
+
+static void arfs_handle_work(struct work_struct *work)
+{
+ struct arfs_rule *arfs_rule = container_of(work,
+ struct arfs_rule,
+ arfs_work);
+ struct mlx5e_priv *priv = arfs_rule->priv;
+ struct mlx5_flow_handle *rule;
+
+ mutex_lock(&priv->state_lock);
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ spin_lock_bh(&priv->fs.arfs.arfs_lock);
+ hlist_del(&arfs_rule->hlist);
+ spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+
+ mutex_unlock(&priv->state_lock);
+ kfree(arfs_rule);
+ goto out;
+ }
+ mutex_unlock(&priv->state_lock);
+
+ if (!arfs_rule->rule) {
+ rule = arfs_add_rule(priv, arfs_rule);
+ if (IS_ERR(rule))
+ goto out;
+ arfs_rule->rule = rule;
+ } else {
+ arfs_modify_rule_rq(priv, arfs_rule->rule,
+ arfs_rule->rxq);
+ }
+out:
+ arfs_may_expire_flow(priv);
+}
+
+static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
+ struct arfs_table *arfs_t,
+ const struct flow_keys *fk,
+ u16 rxq, u32 flow_id)
+{
+ struct arfs_rule *rule;
+ struct arfs_tuple *tuple;
+
+ rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
+ if (!rule)
+ return NULL;
+
+ rule->priv = priv;
+ rule->rxq = rxq;
+ INIT_WORK(&rule->arfs_work, arfs_handle_work);
+
+ tuple = &rule->tuple;
+ tuple->etype = fk->basic.n_proto;
+ tuple->ip_proto = fk->basic.ip_proto;
+ if (tuple->etype == htons(ETH_P_IP)) {
+ tuple->src_ipv4 = fk->addrs.v4addrs.src;
+ tuple->dst_ipv4 = fk->addrs.v4addrs.dst;
+ } else {
+ memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
+ sizeof(struct in6_addr));
+ memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
+ sizeof(struct in6_addr));
+ }
+ tuple->src_port = fk->ports.src;
+ tuple->dst_port = fk->ports.dst;
+
+ rule->flow_id = flow_id;
+ rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
+
+ hlist_add_head(&rule->hlist,
+ arfs_hash_bucket(arfs_t, tuple->src_port,
+ tuple->dst_port));
+ return rule;
+}
+
+static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk)
+{
+ if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst)
+ return false;
+ if (tuple->etype != fk->basic.n_proto)
+ return false;
+ if (tuple->etype == htons(ETH_P_IP))
+ return tuple->src_ipv4 == fk->addrs.v4addrs.src &&
+ tuple->dst_ipv4 == fk->addrs.v4addrs.dst;
+ if (tuple->etype == htons(ETH_P_IPV6))
+ return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
+ sizeof(struct in6_addr)) &&
+ !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
+ sizeof(struct in6_addr));
+ return false;
+}
+
+static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
+ const struct flow_keys *fk)
+{
+ struct arfs_rule *arfs_rule;
+ struct hlist_head *head;
+
+ head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst);
+ hlist_for_each_entry(arfs_rule, head, hlist) {
+ if (arfs_cmp(&arfs_rule->tuple, fk))
+ return arfs_rule;
+ }
+
+ return NULL;
+}
+
+int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+ struct arfs_table *arfs_t;
+ struct arfs_rule *arfs_rule;
+ struct flow_keys fk;
+
+ if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+ return -EPROTONOSUPPORT;
+
+ if (fk.basic.n_proto != htons(ETH_P_IP) &&
+ fk.basic.n_proto != htons(ETH_P_IPV6))
+ return -EPROTONOSUPPORT;
+
+ if (skb->encapsulation)
+ return -EPROTONOSUPPORT;
+
+ arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto);
+ if (!arfs_t)
+ return -EPROTONOSUPPORT;
+
+ spin_lock_bh(&arfs->arfs_lock);
+ arfs_rule = arfs_find_rule(arfs_t, &fk);
+ if (arfs_rule) {
+ if (arfs_rule->rxq == rxq_index) {
+ spin_unlock_bh(&arfs->arfs_lock);
+ return arfs_rule->filter_id;
+ }
+ arfs_rule->rxq = rxq_index;
+ } else {
+ arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id);
+ if (!arfs_rule) {
+ spin_unlock_bh(&arfs->arfs_lock);
+ return -ENOMEM;
+ }
+ }
+ queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work);
+ spin_unlock_bh(&arfs->arfs_lock);
+ return arfs_rule->filter_id;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
new file mode 100644
index 000000000..124e4567a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+
+/* mlx5e global resources should be placed in this file.
+ * Global resources are common to all the netdevices crated on the same nic.
+ */
+
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+ struct mlx5e_tir *tir, u32 *in, int inlen)
+{
+ int err;
+
+ err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn);
+ if (err)
+ return err;
+
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
+ list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+
+ return 0;
+}
+
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+ struct mlx5e_tir *tir)
+{
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
+ mlx5_core_destroy_tir(mdev, tir->tirn);
+ list_del(&tir->list);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+}
+
+static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ struct mlx5_core_mkey *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
+{
+ struct mlx5e_resources *res = &mdev->mlx5e_res;
+ int err;
+
+ err = mlx5_core_alloc_pd(mdev, &res->pdn);
+ if (err) {
+ mlx5_core_err(mdev, "alloc pd failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn);
+ if (err) {
+ mlx5_core_err(mdev, "alloc td failed, %d\n", err);
+ goto err_dealloc_pd;
+ }
+
+ err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey);
+ if (err) {
+ mlx5_core_err(mdev, "create mkey failed, %d\n", err);
+ goto err_dealloc_transport_domain;
+ }
+
+ err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false);
+ if (err) {
+ mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err);
+ goto err_destroy_mkey;
+ }
+
+ INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
+ mutex_init(&mdev->mlx5e_res.td.list_lock);
+
+ return 0;
+
+err_destroy_mkey:
+ mlx5_core_destroy_mkey(mdev, &res->mkey);
+err_dealloc_transport_domain:
+ mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(mdev, res->pdn);
+ return err;
+}
+
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
+{
+ struct mlx5e_resources *res = &mdev->mlx5e_res;
+
+ mlx5_free_bfreg(mdev, &res->bfreg);
+ mlx5_core_destroy_mkey(mdev, &res->mkey);
+ mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+ mlx5_core_dealloc_pd(mdev, res->pdn);
+ memset(res, 0, sizeof(*res));
+}
+
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_tir *tir;
+ int err = 0;
+ u32 tirn = 0;
+ int inlen;
+ void *in;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (enable_uc_lb)
+ MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
+ MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
+ list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
+ tirn = tir->tirn;
+ err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+ if (err)
+ goto out;
+ }
+
+out:
+ kvfree(in);
+ if (err)
+ netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+
+ return err;
+}
+
+u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev)
+{
+ u8 min_inline_mode;
+
+ mlx5_query_min_inline(mdev, &min_inline_mode);
+ if (min_inline_mode == MLX5_INLINE_MODE_NONE &&
+ !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+ min_inline_mode = MLX5_INLINE_MODE_L2;
+
+ return min_inline_mode;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
new file mode 100644
index 000000000..722998d68
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -0,0 +1,1206 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include "en.h"
+#include "en/port.h"
+#include "en/port_buffer.h"
+
+#define MLX5E_100MB (100000)
+#define MLX5E_1GB (1000000)
+
+#define MLX5E_CEE_STATE_UP 1
+#define MLX5E_CEE_STATE_DOWN 0
+
+/* Max supported cable length is 1000 meters */
+#define MLX5E_MAX_CABLE_LENGTH 1000
+
+enum {
+ MLX5E_VENDOR_TC_GROUP_NUM = 7,
+ MLX5E_LOWEST_PRIO_GROUP = 0,
+};
+
+#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \
+ MLX5_CAP_QCAM_REG(mdev, qpts) && \
+ MLX5_CAP_QCAM_REG(mdev, qpdpm))
+
+static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state);
+static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio);
+
+/* If dcbx mode is non-host set the dcbx mode to host.
+ */
+static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv,
+ enum mlx5_dcbx_oper_mode mode)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 param[MLX5_ST_SZ_DW(dcbx_param)];
+ int err;
+
+ err = mlx5_query_port_dcbx_param(mdev, param);
+ if (err)
+ return err;
+
+ MLX5_SET(dcbx_param, param, version_admin, mode);
+ if (mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ MLX5_SET(dcbx_param, param, willing_admin, 1);
+
+ return mlx5_set_port_dcbx_param(mdev, param);
+}
+
+static int mlx5e_dcbnl_switch_to_host_mode(struct mlx5e_priv *priv)
+{
+ struct mlx5e_dcbx *dcbx = &priv->dcbx;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, dcbx))
+ return 0;
+
+ if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ return 0;
+
+ err = mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST);
+ if (err)
+ return err;
+
+ dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
+ struct ieee_ets *ets)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 tc_group[IEEE_8021QAZ_MAX_TCS];
+ bool is_tc_group_6_exist = false;
+ bool is_zero_bw_ets_tc = false;
+ int err = 0;
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return -EOPNOTSUPP;
+
+ ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
+ for (i = 0; i < ets->ets_cap; i++) {
+ err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]);
+ if (err)
+ return err;
+
+ err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]);
+ if (err)
+ return err;
+
+ err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]);
+ if (err)
+ return err;
+
+ if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC &&
+ tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1))
+ is_zero_bw_ets_tc = true;
+
+ if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1))
+ is_tc_group_6_exist = true;
+ }
+
+ /* Report 0% ets tc if exits*/
+ if (is_zero_bw_ets_tc) {
+ for (i = 0; i < ets->ets_cap; i++)
+ if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP)
+ ets->tc_tx_bw[i] = 0;
+ }
+
+ /* Update tc_tsa based on fw setting*/
+ for (i = 0; i < ets->ets_cap; i++) {
+ if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC)
+ priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+ else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM &&
+ !is_tc_group_6_exist)
+ priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+ }
+ memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa));
+
+ return err;
+}
+
+static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc)
+{
+ bool any_tc_mapped_to_ets = false;
+ bool ets_zero_bw = false;
+ int strict_group;
+ int i;
+
+ for (i = 0; i <= max_tc; i++) {
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+ any_tc_mapped_to_ets = true;
+ if (!ets->tc_tx_bw[i])
+ ets_zero_bw = true;
+ }
+ }
+
+ /* strict group has higher priority than ets group */
+ strict_group = MLX5E_LOWEST_PRIO_GROUP;
+ if (any_tc_mapped_to_ets)
+ strict_group++;
+ if (ets_zero_bw)
+ strict_group++;
+
+ for (i = 0; i <= max_tc; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_VENDOR:
+ tc_group[i] = MLX5E_VENDOR_TC_GROUP_NUM;
+ break;
+ case IEEE_8021QAZ_TSA_STRICT:
+ tc_group[i] = strict_group++;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ tc_group[i] = MLX5E_LOWEST_PRIO_GROUP;
+ if (ets->tc_tx_bw[i] && ets_zero_bw)
+ tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1;
+ break;
+ }
+ }
+}
+
+static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
+ u8 *tc_group, int max_tc)
+{
+ int bw_for_ets_zero_bw_tc = 0;
+ int last_ets_zero_bw_tc = -1;
+ int num_ets_zero_bw = 0;
+ int i;
+
+ for (i = 0; i <= max_tc; i++) {
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS &&
+ !ets->tc_tx_bw[i]) {
+ num_ets_zero_bw++;
+ last_ets_zero_bw_tc = i;
+ }
+ }
+
+ if (num_ets_zero_bw)
+ bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw;
+
+ for (i = 0; i <= max_tc; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_VENDOR:
+ tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+ break;
+ case IEEE_8021QAZ_TSA_STRICT:
+ tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ tc_tx_bw[i] = ets->tc_tx_bw[i] ?
+ ets->tc_tx_bw[i] :
+ bw_for_ets_zero_bw_tc;
+ break;
+ }
+ }
+
+ /* Make sure the total bw for ets zero bw group is 100% */
+ if (last_ets_zero_bw_tc != -1)
+ tc_tx_bw[last_ets_zero_bw_tc] +=
+ MLX5E_MAX_BW_ALLOC % num_ets_zero_bw;
+}
+
+/* If there are ETS BW 0,
+ * Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%.
+ * Set group #0 to all the ETS BW 0 tcs and
+ * equally splits the 100% BW between them
+ * Report both group #0 and #1 as ETS type.
+ * All the tcs in group #0 will be reported with 0% BW.
+ */
+int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
+ u8 tc_group[IEEE_8021QAZ_MAX_TCS];
+ int max_tc = mlx5_max_tc(mdev);
+ int err, i;
+
+ mlx5e_build_tc_group(ets, tc_group, max_tc);
+ mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc);
+
+ err = mlx5_set_port_prio_tc(mdev, ets->prio_tc);
+ if (err)
+ return err;
+
+ err = mlx5_set_port_tc_group(mdev, tc_group);
+ if (err)
+ return err;
+
+ err = mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
+
+ if (err)
+ return err;
+
+ memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa));
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n",
+ __func__, i, ets->prio_tc[i]);
+ mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n",
+ __func__, i, tc_tx_bw[i], tc_group[i]);
+ }
+
+ return err;
+}
+
+static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
+ struct ieee_ets *ets,
+ bool zero_sum_allowed)
+{
+ bool have_ets_tc = false;
+ int bw_sum = 0;
+ int i;
+
+ /* Validate Priority */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) {
+ netdev_err(netdev,
+ "Failed to validate ETS: priority value greater than max(%d)\n",
+ MLX5E_MAX_PRIORITY);
+ return -EINVAL;
+ }
+ }
+
+ /* Validate Bandwidth Sum */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+ have_ets_tc = true;
+ bw_sum += ets->tc_tx_bw[i];
+ }
+ }
+
+ if (have_ets_tc && bw_sum != 100) {
+ if (bw_sum || (!bw_sum && !zero_sum_allowed))
+ netdev_err(netdev,
+ "Failed to validate ETS: BW sum is illegal\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
+ struct ieee_ets *ets)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return -EOPNOTSUPP;
+
+ err = mlx5e_dbcnl_validate_ets(netdev, ets, false);
+ if (err)
+ return err;
+
+ err = mlx5e_dcbnl_ieee_setets_core(priv, ets);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ int i;
+
+ pfc->pfc_cap = mlx5_max_tc(mdev) + 1;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ pfc->requests[i] = PPORT_PER_PRIO_GET(pstats, i, tx_pause);
+ pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause);
+ }
+
+ if (MLX5_BUFFER_SUPPORTED(mdev))
+ pfc->delay = priv->dcbx.cable_len;
+
+ return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL);
+}
+
+static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 old_cable_len = priv->dcbx.cable_len;
+ struct ieee_pfc pfc_new;
+ u32 changed = 0;
+ u8 curr_pfc_en;
+ int ret = 0;
+
+ /* pfc_en */
+ mlx5_query_port_pfc(mdev, &curr_pfc_en, NULL);
+ if (pfc->pfc_en != curr_pfc_en) {
+ ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
+ if (ret)
+ return ret;
+ mlx5_toggle_port_link(mdev);
+ changed |= MLX5E_PORT_BUFFER_PFC;
+ }
+
+ if (pfc->delay &&
+ pfc->delay < MLX5E_MAX_CABLE_LENGTH &&
+ pfc->delay != priv->dcbx.cable_len) {
+ priv->dcbx.cable_len = pfc->delay;
+ changed |= MLX5E_PORT_BUFFER_CABLE_LEN;
+ }
+
+ if (MLX5_BUFFER_SUPPORTED(mdev)) {
+ pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en;
+ if (priv->dcbx.manual_buffer)
+ ret = mlx5e_port_manual_buffer_config(priv, changed,
+ dev->mtu, &pfc_new,
+ NULL, NULL);
+
+ if (ret && (changed & MLX5E_PORT_BUFFER_CABLE_LEN))
+ priv->dcbx.cable_len = old_cable_len;
+ }
+
+ if (!ret) {
+ mlx5e_dbg(HW, priv,
+ "%s: PFC per priority bit mask: 0x%x\n",
+ __func__, pfc->pfc_en);
+ }
+ return ret;
+}
+
+static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return priv->dcbx.cap;
+}
+
+static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+ if (mode & DCB_CAP_DCBX_LLD_MANAGED)
+ return 1;
+
+ if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
+ if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
+ return 0;
+
+ /* set dcbx to fw controlled */
+ if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) {
+ dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+ dcbx->cap &= ~DCB_CAP_DCBX_HOST;
+ return 0;
+ }
+
+ return 1;
+ }
+
+ if (!(mode & DCB_CAP_DCBX_HOST))
+ return 1;
+
+ if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+ return 1;
+
+ dcbx->cap = mode;
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct dcb_app temp;
+ bool is_new;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) ||
+ !MLX5_DSCP_SUPPORTED(priv->mdev))
+ return -EOPNOTSUPP;
+
+ if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) ||
+ (app->protocol >= MLX5E_MAX_DSCP))
+ return -EINVAL;
+
+ /* Save the old entry info */
+ temp.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+ temp.protocol = app->protocol;
+ temp.priority = priv->dcbx_dp.dscp2prio[app->protocol];
+
+ /* Check if need to switch to dscp trust state */
+ if (!priv->dcbx.dscp_app_cnt) {
+ err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_DSCP);
+ if (err)
+ return err;
+ }
+
+ /* Skip the fw command if new and old mapping are the same */
+ if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) {
+ err = mlx5e_set_dscp2prio(priv, app->protocol, app->priority);
+ if (err)
+ goto fw_err;
+ }
+
+ /* Delete the old entry if exists */
+ is_new = false;
+ err = dcb_ieee_delapp(dev, &temp);
+ if (err)
+ is_new = true;
+
+ /* Add new entry and update counter */
+ err = dcb_ieee_setapp(dev, app);
+ if (err)
+ return err;
+
+ if (is_new)
+ priv->dcbx.dscp_app_cnt++;
+
+ return err;
+
+fw_err:
+ mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+ return err;
+}
+
+static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) ||
+ !MLX5_DSCP_SUPPORTED(priv->mdev))
+ return -EOPNOTSUPP;
+
+ if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) ||
+ (app->protocol >= MLX5E_MAX_DSCP))
+ return -EINVAL;
+
+ /* Skip if no dscp app entry */
+ if (!priv->dcbx.dscp_app_cnt)
+ return -ENOENT;
+
+ /* Check if the entry matches fw setting */
+ if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol])
+ return -ENOENT;
+
+ /* Delete the app entry */
+ err = dcb_ieee_delapp(dev, app);
+ if (err)
+ return err;
+
+ /* Reset the priority mapping back to zero */
+ err = mlx5e_set_dscp2prio(priv, app->protocol, 0);
+ if (err)
+ goto fw_err;
+
+ priv->dcbx.dscp_app_cnt--;
+
+ /* Check if need to switch to pcp trust state */
+ if (!priv->dcbx.dscp_app_cnt)
+ err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+
+ return err;
+
+fw_err:
+ mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+ return err;
+}
+
+static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+ u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+ int err;
+ int i;
+
+ err = mlx5_query_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
+ if (err)
+ return err;
+
+ memset(maxrate->tc_maxrate, 0, sizeof(maxrate->tc_maxrate));
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ switch (max_bw_unit[i]) {
+ case MLX5_100_MBPS_UNIT:
+ maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_100MB;
+ break;
+ case MLX5_GBPS_UNIT:
+ maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_1GB;
+ break;
+ case MLX5_BW_NO_LIMIT:
+ break;
+ default:
+ WARN(true, "non-supported BW unit");
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+ u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+ __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB);
+ int i;
+
+ memset(max_bw_value, 0, sizeof(max_bw_value));
+ memset(max_bw_unit, 0, sizeof(max_bw_unit));
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ if (!maxrate->tc_maxrate[i]) {
+ max_bw_unit[i] = MLX5_BW_NO_LIMIT;
+ continue;
+ }
+ if (maxrate->tc_maxrate[i] < upper_limit_mbps) {
+ max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
+ MLX5E_100MB);
+ max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1;
+ max_bw_unit[i] = MLX5_100_MBPS_UNIT;
+ } else {
+ max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
+ MLX5E_1GB);
+ max_bw_unit[i] = MLX5_GBPS_UNIT;
+ }
+ }
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n",
+ __func__, i, max_bw_value[i]);
+ }
+
+ return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
+}
+
+static u8 mlx5e_dcbnl_setall(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct ieee_ets ets;
+ struct ieee_pfc pfc;
+ int err = -EOPNOTSUPP;
+ int i;
+
+ if (!MLX5_CAP_GEN(mdev, ets))
+ goto out;
+
+ memset(&ets, 0, sizeof(ets));
+ memset(&pfc, 0, sizeof(pfc));
+
+ ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+ for (i = 0; i < CEE_DCBX_MAX_PGS; i++) {
+ ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i];
+ ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i];
+ ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+ ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i];
+ mlx5e_dbg(HW, priv,
+ "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n",
+ __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i],
+ ets.prio_tc[i]);
+ }
+
+ err = mlx5e_dbcnl_validate_ets(netdev, &ets, true);
+ if (err)
+ goto out;
+
+ err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+ if (err) {
+ netdev_err(netdev,
+ "%s, Failed to set ETS: %d\n", __func__, err);
+ goto out;
+ }
+
+ /* Set PFC */
+ pfc.pfc_cap = mlx5_max_tc(mdev) + 1;
+ if (!cee_cfg->pfc_enable)
+ pfc.pfc_en = 0;
+ else
+ for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+ pfc.pfc_en |= cee_cfg->pfc_setting[i] << i;
+
+ err = mlx5e_dcbnl_ieee_setpfc(netdev, &pfc);
+ if (err) {
+ netdev_err(netdev,
+ "%s, Failed to set PFC: %d\n", __func__, err);
+ goto out;
+ }
+out:
+ return err ? MLX5_DCB_NO_CHG : MLX5_DCB_CHG_RESET;
+}
+
+static u8 mlx5e_dcbnl_getstate(struct net_device *netdev)
+{
+ return MLX5E_CEE_STATE_UP;
+}
+
+static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
+ u8 *perm_addr)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (!perm_addr)
+ return;
+
+ memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+ mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
+}
+
+static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
+ int priority, u8 prio_type,
+ u8 pgid, u8 bw_pct, u8 up_map)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ if (pgid >= CEE_DCBX_MAX_PGS) {
+ netdev_err(netdev,
+ "%s, priority group is out of range\n", __func__);
+ return;
+ }
+
+ cee_cfg->prio_to_pg_map[priority] = pgid;
+}
+
+static void mlx5e_dcbnl_setpgbwgcfgtx(struct net_device *netdev,
+ int pgid, u8 bw_pct)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if (pgid >= CEE_DCBX_MAX_PGS) {
+ netdev_err(netdev,
+ "%s, priority group is out of range\n", __func__);
+ return;
+ }
+
+ cee_cfg->pg_bw_pct[pgid] = bw_pct;
+}
+
+static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev,
+ int priority, u8 *prio_type,
+ u8 *pgid, u8 *bw_pct, u8 *up_map)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets)) {
+ netdev_err(netdev, "%s, ets is not supported\n", __func__);
+ return;
+ }
+
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ *prio_type = 0;
+ *bw_pct = 0;
+ *up_map = 0;
+
+ if (mlx5_query_port_prio_tc(mdev, priority, pgid))
+ *pgid = 0;
+}
+
+static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev,
+ int pgid, u8 *bw_pct)
+{
+ struct ieee_ets ets;
+
+ if (pgid >= CEE_DCBX_MAX_PGS) {
+ netdev_err(netdev,
+ "%s, priority group is out of range\n", __func__);
+ return;
+ }
+
+ mlx5e_dcbnl_ieee_getets(netdev, &ets);
+ *bw_pct = ets.tc_tx_bw[pgid];
+}
+
+static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev,
+ int priority, u8 setting)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ if (setting > 1)
+ return;
+
+ cee_cfg->pfc_setting[priority] = setting;
+}
+
+static int
+mlx5e_dcbnl_get_priority_pfc(struct net_device *netdev,
+ int priority, u8 *setting)
+{
+ struct ieee_pfc pfc;
+ int err;
+
+ err = mlx5e_dcbnl_ieee_getpfc(netdev, &pfc);
+
+ if (err)
+ *setting = 0;
+ else
+ *setting = (pfc.pfc_en >> priority) & 0x01;
+
+ return err;
+}
+
+static void mlx5e_dcbnl_getpfccfg(struct net_device *netdev,
+ int priority, u8 *setting)
+{
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ if (!setting)
+ return;
+
+ mlx5e_dcbnl_get_priority_pfc(netdev, priority, setting);
+}
+
+static u8 mlx5e_dcbnl_getcap(struct net_device *netdev,
+ int capid, u8 *cap)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 rval = 0;
+
+ switch (capid) {
+ case DCB_CAP_ATTR_PG:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_PFC:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_UP2TC:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_PG_TCS:
+ *cap = 1 << mlx5_max_tc(mdev);
+ break;
+ case DCB_CAP_ATTR_PFC_TCS:
+ *cap = 1 << mlx5_max_tc(mdev);
+ break;
+ case DCB_CAP_ATTR_GSP:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_BCN:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_DCBX:
+ *cap = priv->dcbx.cap |
+ DCB_CAP_DCBX_VER_CEE |
+ DCB_CAP_DCBX_VER_IEEE;
+ break;
+ default:
+ *cap = 0;
+ rval = 1;
+ break;
+ }
+
+ return rval;
+}
+
+static int mlx5e_dcbnl_getnumtcs(struct net_device *netdev,
+ int tcs_id, u8 *num)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ switch (tcs_id) {
+ case DCB_NUMTCS_ATTR_PG:
+ case DCB_NUMTCS_ATTR_PFC:
+ *num = mlx5_max_tc(mdev) + 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static u8 mlx5e_dcbnl_getpfcstate(struct net_device *netdev)
+{
+ struct ieee_pfc pfc;
+
+ if (mlx5e_dcbnl_ieee_getpfc(netdev, &pfc))
+ return MLX5E_CEE_STATE_DOWN;
+
+ return pfc.pfc_en ? MLX5E_CEE_STATE_UP : MLX5E_CEE_STATE_DOWN;
+}
+
+static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if ((state != MLX5E_CEE_STATE_UP) && (state != MLX5E_CEE_STATE_DOWN))
+ return;
+
+ cee_cfg->pfc_enable = state;
+}
+
+static int mlx5e_dcbnl_getbuffer(struct net_device *dev,
+ struct dcbnl_buffer *dcb_buffer)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_port_buffer port_buffer;
+ u8 buffer[MLX5E_MAX_PRIORITY];
+ int i, err;
+
+ if (!MLX5_BUFFER_SUPPORTED(mdev))
+ return -EOPNOTSUPP;
+
+ err = mlx5e_port_query_priority2buffer(mdev, buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+ dcb_buffer->prio2buffer[i] = buffer[i];
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++)
+ dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size;
+ dcb_buffer->total_size = port_buffer.port_buffer_size;
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_setbuffer(struct net_device *dev,
+ struct dcbnl_buffer *dcb_buffer)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_port_buffer port_buffer;
+ u8 old_prio2buffer[MLX5E_MAX_PRIORITY];
+ u32 *buffer_size = NULL;
+ u8 *prio2buffer = NULL;
+ u32 changed = 0;
+ int i, err;
+
+ if (!MLX5_BUFFER_SUPPORTED(mdev))
+ return -EOPNOTSUPP;
+
+ for (i = 0; i < DCBX_MAX_BUFFERS; i++)
+ mlx5_core_dbg(mdev, "buffer[%d]=%d\n", i, dcb_buffer->buffer_size[i]);
+
+ for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+ mlx5_core_dbg(mdev, "priority %d buffer%d\n", i, dcb_buffer->prio2buffer[i]);
+
+ err = mlx5e_port_query_priority2buffer(mdev, old_prio2buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_PRIORITY; i++) {
+ if (dcb_buffer->prio2buffer[i] != old_prio2buffer[i]) {
+ changed |= MLX5E_PORT_BUFFER_PRIO2BUFFER;
+ prio2buffer = dcb_buffer->prio2buffer;
+ break;
+ }
+ }
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) {
+ changed |= MLX5E_PORT_BUFFER_SIZE;
+ buffer_size = dcb_buffer->buffer_size;
+ break;
+ }
+ }
+
+ if (!changed)
+ return 0;
+
+ priv->dcbx.manual_buffer = true;
+ err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL,
+ buffer_size, prio2buffer);
+ return err;
+}
+
+const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
+ .ieee_getets = mlx5e_dcbnl_ieee_getets,
+ .ieee_setets = mlx5e_dcbnl_ieee_setets,
+ .ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate,
+ .ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate,
+ .ieee_getpfc = mlx5e_dcbnl_ieee_getpfc,
+ .ieee_setpfc = mlx5e_dcbnl_ieee_setpfc,
+ .ieee_setapp = mlx5e_dcbnl_ieee_setapp,
+ .ieee_delapp = mlx5e_dcbnl_ieee_delapp,
+ .getdcbx = mlx5e_dcbnl_getdcbx,
+ .setdcbx = mlx5e_dcbnl_setdcbx,
+ .dcbnl_getbuffer = mlx5e_dcbnl_getbuffer,
+ .dcbnl_setbuffer = mlx5e_dcbnl_setbuffer,
+
+/* CEE interfaces */
+ .setall = mlx5e_dcbnl_setall,
+ .getstate = mlx5e_dcbnl_getstate,
+ .getpermhwaddr = mlx5e_dcbnl_getpermhwaddr,
+
+ .setpgtccfgtx = mlx5e_dcbnl_setpgtccfgtx,
+ .setpgbwgcfgtx = mlx5e_dcbnl_setpgbwgcfgtx,
+ .getpgtccfgtx = mlx5e_dcbnl_getpgtccfgtx,
+ .getpgbwgcfgtx = mlx5e_dcbnl_getpgbwgcfgtx,
+
+ .setpfccfg = mlx5e_dcbnl_setpfccfg,
+ .getpfccfg = mlx5e_dcbnl_getpfccfg,
+ .getcap = mlx5e_dcbnl_getcap,
+ .getnumtcs = mlx5e_dcbnl_getnumtcs,
+ .getpfcstate = mlx5e_dcbnl_getpfcstate,
+ .setpfcstate = mlx5e_dcbnl_setpfcstate,
+};
+
+static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
+ enum mlx5_dcbx_oper_mode *mode)
+{
+ u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+ *mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+
+ if (!mlx5_query_port_dcbx_param(priv->mdev, out))
+ *mode = MLX5_GET(dcbx_param, out, version_oper);
+
+ /* From driver's point of view, we only care if the mode
+ * is host (HOST) or non-host (AUTO)
+ */
+ if (*mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ *mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+}
+
+static void mlx5e_ets_init(struct mlx5e_priv *priv)
+{
+ struct ieee_ets ets;
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return;
+
+ memset(&ets, 0, sizeof(ets));
+ ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
+ for (i = 0; i < ets.ets_cap; i++) {
+ ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+ ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+ ets.prio_tc[i] = i;
+ }
+
+ if (ets.ets_cap > 1) {
+ /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
+ ets.prio_tc[0] = 1;
+ ets.prio_tc[1] = 0;
+ }
+
+ err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+ if (err)
+ netdev_err(priv->netdev,
+ "%s, Failed to init ETS: %d\n", __func__, err);
+}
+
+enum {
+ INIT,
+ DELETE,
+};
+
+static void mlx5e_dcbnl_dscp_app(struct mlx5e_priv *priv, int action)
+{
+ struct dcb_app temp;
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+ return;
+
+ if (!MLX5_DSCP_SUPPORTED(priv->mdev))
+ return;
+
+ /* No SEL_DSCP entry in non DSCP state */
+ if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_DSCP)
+ return;
+
+ temp.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+ for (i = 0; i < MLX5E_MAX_DSCP; i++) {
+ temp.protocol = i;
+ temp.priority = priv->dcbx_dp.dscp2prio[i];
+ if (action == INIT)
+ dcb_ieee_setapp(priv->netdev, &temp);
+ else
+ dcb_ieee_delapp(priv->netdev, &temp);
+ }
+
+ priv->dcbx.dscp_app_cnt = (action == INIT) ? MLX5E_MAX_DSCP : 0;
+}
+
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv)
+{
+ mlx5e_dcbnl_dscp_app(priv, INIT);
+}
+
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv)
+{
+ mlx5e_dcbnl_dscp_app(priv, DELETE);
+}
+
+static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv,
+ struct mlx5e_params *params)
+{
+ params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev);
+ if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP &&
+ params->tx_min_inline_mode == MLX5_INLINE_MODE_L2)
+ params->tx_min_inline_mode = MLX5_INLINE_MODE_IP;
+}
+
+static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv)
+{
+ struct mlx5e_channels new_channels = {};
+
+ mutex_lock(&priv->state_lock);
+
+ new_channels.params = priv->channels.params;
+ mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
+ /* Skip if tx_min_inline is the same */
+ if (new_channels.params.tx_min_inline_mode ==
+ priv->channels.params.tx_min_inline_mode)
+ goto out;
+
+ if (mlx5e_open_channels(priv, &new_channels))
+ goto out;
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+
+out:
+ mutex_unlock(&priv->state_lock);
+}
+
+static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
+{
+ int err;
+
+ err = mlx5_set_trust_state(priv->mdev, trust_state);
+ if (err)
+ return err;
+ priv->dcbx_dp.trust_state = trust_state;
+ mlx5e_trust_update_sq_inline_mode(priv);
+
+ return err;
+}
+
+static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio)
+{
+ int err;
+
+ err = mlx5_set_dscp2prio(priv->mdev, dscp, prio);
+ if (err)
+ return err;
+
+ priv->dcbx_dp.dscp2prio[dscp] = prio;
+ return err;
+}
+
+static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
+
+ if (!MLX5_DSCP_SUPPORTED(mdev))
+ return 0;
+
+ err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state);
+ if (err)
+ return err;
+
+ mlx5e_trust_update_tx_min_inline_mode(priv, &priv->channels.params);
+
+ err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
+{
+ struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+ mlx5e_trust_initialize(priv);
+
+ if (!MLX5_CAP_GEN(priv->mdev, qos))
+ return;
+
+ if (MLX5_CAP_GEN(priv->mdev, dcbx))
+ mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode);
+
+ priv->dcbx.cap = DCB_CAP_DCBX_VER_CEE |
+ DCB_CAP_DCBX_VER_IEEE;
+ if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ priv->dcbx.cap |= DCB_CAP_DCBX_HOST;
+
+ priv->dcbx.manual_buffer = false;
+ priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN;
+
+ mlx5e_ets_init(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
new file mode 100644
index 000000000..d67adf70a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/net_dim.h>
+#include "en.h"
+
+static void
+mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
+ struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
+{
+ mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
+ dim->state = NET_DIM_START_MEASURE;
+}
+
+void mlx5e_rx_dim_work(struct work_struct *work)
+{
+ struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
+ struct net_dim_cq_moder cur_moder =
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+
+ mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
+}
+
+void mlx5e_tx_dim_work(struct work_struct *work)
+{
+ struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
+ struct net_dim_cq_moder cur_moder =
+ net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+
+ mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
new file mode 100644
index 000000000..a5c4e4f0f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -0,0 +1,1701 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+#include "en/port.h"
+#include "lib/clock.h"
+
+void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ strlcpy(drvinfo->driver, DRIVER_NAME, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, DRIVER_VERSION,
+ sizeof(drvinfo->version));
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d (%.16s)",
+ fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
+ mdev->board_id);
+ strlcpy(drvinfo->bus_info, pci_name(mdev->pdev),
+ sizeof(drvinfo->bus_info));
+}
+
+static void mlx5e_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_drvinfo(priv, drvinfo);
+}
+
+struct ptys2ethtool_config {
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
+};
+
+static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER];
+
+#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, ...) \
+ ({ \
+ struct ptys2ethtool_config *cfg; \
+ const unsigned int modes[] = { __VA_ARGS__ }; \
+ unsigned int i; \
+ cfg = &ptys2ethtool_table[reg_]; \
+ bitmap_zero(cfg->supported, \
+ __ETHTOOL_LINK_MODE_MASK_NBITS); \
+ bitmap_zero(cfg->advertised, \
+ __ETHTOOL_LINK_MODE_MASK_NBITS); \
+ for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \
+ __set_bit(modes[i], cfg->supported); \
+ __set_bit(modes[i], cfg->advertised); \
+ } \
+ })
+
+void mlx5e_build_ptys2ethtool_map(void)
+{
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2,
+ ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4,
+ ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4,
+ ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2,
+ ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4,
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR,
+ ETHTOOL_LINK_MODE_25000baseCR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR,
+ ETHTOOL_LINK_MODE_25000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR,
+ ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2,
+ ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2,
+ ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
+}
+
+int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
+{
+ int i, num_stats = 0;
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < mlx5e_num_stats_grps; i++)
+ num_stats += mlx5e_stats_grps[i].get_num_stats(priv);
+ return num_stats;
+ case ETH_SS_PRIV_FLAGS:
+ return ARRAY_SIZE(mlx5e_priv_flags);
+ case ETH_SS_TEST:
+ return mlx5e_self_test_num(priv);
+ /* fallthrough */
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_get_sset_count(struct net_device *dev, int sset)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_get_sset_count(priv, sset);
+}
+
+static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data)
+{
+ int i, idx = 0;
+
+ for (i = 0; i < mlx5e_num_stats_grps; i++)
+ idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx);
+}
+
+void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
+{
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_PRIV_FLAGS:
+ for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++)
+ strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]);
+ break;
+
+ case ETH_SS_TEST:
+ for (i = 0; i < mlx5e_self_test_num(priv); i++)
+ strcpy(data + i * ETH_GSTRING_LEN,
+ mlx5e_self_tests[i]);
+ break;
+
+ case ETH_SS_STATS:
+ mlx5e_fill_stats_strings(priv, data);
+ break;
+ }
+}
+
+static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_strings(priv, stringset, data);
+}
+
+void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
+ struct ethtool_stats *stats, u64 *data)
+{
+ int i, idx = 0;
+
+ mutex_lock(&priv->state_lock);
+ mlx5e_update_stats(priv);
+ mutex_unlock(&priv->state_lock);
+
+ for (i = 0; i < mlx5e_num_stats_grps; i++)
+ idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx);
+}
+
+static void mlx5e_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
+}
+
+void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param)
+{
+ param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
+ param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
+ param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames;
+ param->tx_pending = 1 << priv->channels.params.log_sq_size;
+}
+
+static void mlx5e_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_ringparam(priv, param);
+}
+
+int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_channels new_channels = {};
+ u8 log_rq_size;
+ u8 log_sq_size;
+ int err = 0;
+
+ if (param->rx_jumbo_pending) {
+ netdev_info(priv->netdev, "%s: rx_jumbo_pending not supported\n",
+ __func__);
+ return -EINVAL;
+ }
+ if (param->rx_mini_pending) {
+ netdev_info(priv->netdev, "%s: rx_mini_pending not supported\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) {
+ netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n",
+ __func__, param->rx_pending,
+ 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
+ return -EINVAL;
+ }
+
+ if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
+ netdev_info(priv->netdev, "%s: tx_pending (%d) < min (%d)\n",
+ __func__, param->tx_pending,
+ 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
+ return -EINVAL;
+ }
+
+ log_rq_size = order_base_2(param->rx_pending);
+ log_sq_size = order_base_2(param->tx_pending);
+
+ if (log_rq_size == priv->channels.params.log_rq_mtu_frames &&
+ log_sq_size == priv->channels.params.log_sq_size)
+ return 0;
+
+ mutex_lock(&priv->state_lock);
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.log_rq_mtu_frames = log_rq_size;
+ new_channels.params.log_sq_size = log_sq_size;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto unlock;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto unlock;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_ringparam(priv, param);
+}
+
+void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch)
+{
+ ch->max_combined = priv->profile->max_nch(priv->mdev);
+ ch->combined_count = priv->channels.params.num_channels;
+}
+
+static void mlx5e_get_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_channels(priv, ch);
+}
+
+int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch)
+{
+ unsigned int count = ch->combined_count;
+ struct mlx5e_channels new_channels = {};
+ bool arfs_enabled;
+ int err = 0;
+
+ if (!count) {
+ netdev_info(priv->netdev, "%s: combined_count=0 not supported\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (priv->channels.params.num_channels == count)
+ return 0;
+
+ mutex_lock(&priv->state_lock);
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.num_channels = count;
+ if (!netif_is_rxfh_configured(priv->netdev))
+ mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, count);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
+ /* Create fresh channels with new parameters */
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE;
+ if (arfs_enabled)
+ mlx5e_arfs_disable(priv);
+
+ /* Switch to new channels, set new parameters and close old ones */
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+
+ if (arfs_enabled) {
+ err = mlx5e_arfs_enable(priv);
+ if (err)
+ netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n",
+ __func__, err);
+ }
+
+out:
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_channels(priv, ch);
+}
+
+int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal)
+{
+ struct net_dim_cq_moder *rx_moder, *tx_moder;
+
+ if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
+ return -EOPNOTSUPP;
+
+ rx_moder = &priv->channels.params.rx_cq_moderation;
+ coal->rx_coalesce_usecs = rx_moder->usec;
+ coal->rx_max_coalesced_frames = rx_moder->pkts;
+ coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled;
+
+ tx_moder = &priv->channels.params.tx_cq_moderation;
+ coal->tx_coalesce_usecs = tx_moder->usec;
+ coal->tx_max_coalesced_frames = tx_moder->pkts;
+ coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled;
+
+ return 0;
+}
+
+static int mlx5e_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal);
+}
+
+#define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD
+#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT
+
+static void
+mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int tc;
+ int i;
+
+ for (i = 0; i < priv->channels.num; ++i) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ mlx5_core_modify_cq_moderation(mdev,
+ &c->sq[tc].cq.mcq,
+ coal->tx_coalesce_usecs,
+ coal->tx_max_coalesced_frames);
+ }
+
+ mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
+ coal->rx_coalesce_usecs,
+ coal->rx_max_coalesced_frames);
+ }
+}
+
+int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal)
+{
+ struct net_dim_cq_moder *rx_moder, *tx_moder;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ int err = 0;
+ bool reset;
+
+ if (!MLX5_CAP_GEN(mdev, cq_moderation))
+ return -EOPNOTSUPP;
+
+ if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME ||
+ coal->rx_coalesce_usecs > MLX5E_MAX_COAL_TIME) {
+ netdev_info(priv->netdev, "%s: maximum coalesce time supported is %lu usecs\n",
+ __func__, MLX5E_MAX_COAL_TIME);
+ return -ERANGE;
+ }
+
+ if (coal->tx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES ||
+ coal->rx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES) {
+ netdev_info(priv->netdev, "%s: maximum coalesced frames supported is %lu\n",
+ __func__, MLX5E_MAX_COAL_FRAMES);
+ return -ERANGE;
+ }
+
+ mutex_lock(&priv->state_lock);
+ new_channels.params = priv->channels.params;
+
+ rx_moder = &new_channels.params.rx_cq_moderation;
+ rx_moder->usec = coal->rx_coalesce_usecs;
+ rx_moder->pkts = coal->rx_max_coalesced_frames;
+ new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
+
+ tx_moder = &new_channels.params.tx_cq_moderation;
+ tx_moder->usec = coal->tx_coalesce_usecs;
+ tx_moder->pkts = coal->tx_max_coalesced_frames;
+ new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+ /* we are opened */
+
+ reset = (!!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled) ||
+ (!!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled);
+
+ if (!reset) {
+ mlx5e_set_priv_channels_coalesce(priv, coal);
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
+ /* open fresh channels with new coal parameters */
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal);
+}
+
+static void ptys2ethtool_supported_link(unsigned long *supported_modes,
+ u32 eth_proto_cap)
+{
+ unsigned long proto_cap = eth_proto_cap;
+ int proto;
+
+ for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
+ bitmap_or(supported_modes, supported_modes,
+ ptys2ethtool_table[proto].supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
+ u32 eth_proto_cap)
+{
+ unsigned long proto_cap = eth_proto_cap;
+ int proto;
+
+ for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
+ bitmap_or(advertising_modes, advertising_modes,
+ ptys2ethtool_table[proto].advertised,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings,
+ u32 eth_proto_cap,
+ u8 connector_type)
+{
+ if (!connector_type || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) {
+ if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
+ | MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
+ | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4)
+ | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4)
+ | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4)
+ | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported,
+ FIBRE);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising,
+ FIBRE);
+ }
+
+ if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4)
+ | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4)
+ | MLX5E_PROT_MASK(MLX5E_10GBASE_KR)
+ | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4)
+ | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported,
+ Backplane);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising,
+ Backplane);
+ }
+ return;
+ }
+
+ switch (connector_type) {
+ case MLX5E_PORT_TP:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, TP);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, TP);
+ break;
+ case MLX5E_PORT_AUI:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, AUI);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, AUI);
+ break;
+ case MLX5E_PORT_BNC:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, BNC);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, BNC);
+ break;
+ case MLX5E_PORT_MII:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, MII);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, MII);
+ break;
+ case MLX5E_PORT_FIBRE:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, FIBRE);
+ break;
+ case MLX5E_PORT_DA:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, Backplane);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, Backplane);
+ break;
+ case MLX5E_PORT_NONE:
+ case MLX5E_PORT_OTHER:
+ default:
+ break;
+ }
+}
+
+static void get_speed_duplex(struct net_device *netdev,
+ u32 eth_proto_oper,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ u32 speed = SPEED_UNKNOWN;
+ u8 duplex = DUPLEX_UNKNOWN;
+
+ if (!netif_carrier_ok(netdev))
+ goto out;
+
+ speed = mlx5e_port_ptys2speed(eth_proto_oper);
+ if (!speed) {
+ speed = SPEED_UNKNOWN;
+ goto out;
+ }
+
+ duplex = DUPLEX_FULL;
+
+out:
+ link_ksettings->base.speed = speed;
+ link_ksettings->base.duplex = duplex;
+}
+
+static void get_supported(u32 eth_proto_cap,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ unsigned long *supported = link_ksettings->link_modes.supported;
+
+ ptys2ethtool_supported_link(supported, eth_proto_cap);
+ ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
+}
+
+static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
+ u8 rx_pause,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ unsigned long *advertising = link_ksettings->link_modes.advertising;
+
+ ptys2ethtool_adver_link(advertising, eth_proto_cap);
+ if (rx_pause)
+ ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
+ if (tx_pause ^ rx_pause)
+ ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause);
+}
+
+static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = {
+ [MLX5E_PORT_UNKNOWN] = PORT_OTHER,
+ [MLX5E_PORT_NONE] = PORT_NONE,
+ [MLX5E_PORT_TP] = PORT_TP,
+ [MLX5E_PORT_AUI] = PORT_AUI,
+ [MLX5E_PORT_BNC] = PORT_BNC,
+ [MLX5E_PORT_MII] = PORT_MII,
+ [MLX5E_PORT_FIBRE] = PORT_FIBRE,
+ [MLX5E_PORT_DA] = PORT_DA,
+ [MLX5E_PORT_OTHER] = PORT_OTHER,
+ };
+
+static u8 get_connector_port(u32 eth_proto, u8 connector_type)
+{
+ if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
+ return ptys2connector_type[connector_type];
+
+ if (eth_proto &
+ (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) |
+ MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) |
+ MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) |
+ MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
+ return PORT_FIBRE;
+ }
+
+ if (eth_proto &
+ (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) |
+ MLX5E_PROT_MASK(MLX5E_10GBASE_CR) |
+ MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) {
+ return PORT_DA;
+ }
+
+ if (eth_proto &
+ (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) |
+ MLX5E_PROT_MASK(MLX5E_10GBASE_KR) |
+ MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) |
+ MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) {
+ return PORT_NONE;
+ }
+
+ return PORT_OTHER;
+}
+
+static void get_lp_advertising(u32 eth_proto_lp,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
+
+ ptys2ethtool_adver_link(lp_advertising, eth_proto_lp);
+}
+
+static int mlx5e_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+ u32 rx_pause = 0;
+ u32 tx_pause = 0;
+ u32 eth_proto_cap;
+ u32 eth_proto_admin;
+ u32 eth_proto_lp;
+ u32 eth_proto_oper;
+ u8 an_disable_admin;
+ u8 an_status;
+ u8 connector_type;
+ int err;
+
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+ if (err) {
+ netdev_err(netdev, "%s: query port ptys failed: %d\n",
+ __func__, err);
+ goto err_query_ptys;
+ }
+
+ eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
+ eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
+ eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+ eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
+ an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+ an_status = MLX5_GET(ptys_reg, out, an_status);
+ connector_type = MLX5_GET(ptys_reg, out, connector_type);
+
+ mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+
+ get_supported(eth_proto_cap, link_ksettings);
+ get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings);
+ get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
+
+ eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
+
+ link_ksettings->base.port = get_connector_port(eth_proto_oper,
+ connector_type);
+ ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin,
+ connector_type);
+ get_lp_advertising(eth_proto_lp, link_ksettings);
+
+ if (an_status == MLX5_AN_COMPLETE)
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ lp_advertising, Autoneg);
+
+ link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE :
+ AUTONEG_ENABLE;
+ ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+ Autoneg);
+ if (!an_disable_admin)
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, Autoneg);
+
+err_query_ptys:
+ return err;
+}
+
+static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
+{
+ u32 i, ptys_modes = 0;
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+ if (bitmap_intersects(ptys2ethtool_table[i].advertised,
+ link_modes,
+ __ETHTOOL_LINK_MODE_MASK_NBITS))
+ ptys_modes |= MLX5E_PROT_MASK(i);
+ }
+
+ return ptys_modes;
+}
+
+static int mlx5e_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 eth_proto_cap, eth_proto_admin;
+ bool an_changes = false;
+ u8 an_disable_admin;
+ u8 an_disable_cap;
+ bool an_disable;
+ u32 link_modes;
+ u8 an_status;
+ u32 speed;
+ int err;
+
+ speed = link_ksettings->base.speed;
+
+ link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
+ mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) :
+ mlx5e_port_speed2linkmodes(speed);
+
+ err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+ if (err) {
+ netdev_err(netdev, "%s: query port eth proto cap failed: %d\n",
+ __func__, err);
+ goto out;
+ }
+
+ link_modes = link_modes & eth_proto_cap;
+ if (!link_modes) {
+ netdev_err(netdev, "%s: Not supported link mode(s) requested",
+ __func__);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mlx5_query_port_proto_admin(mdev, &eth_proto_admin, MLX5_PTYS_EN);
+ if (err) {
+ netdev_err(netdev, "%s: query port eth proto admin failed: %d\n",
+ __func__, err);
+ goto out;
+ }
+
+ mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status,
+ &an_disable_cap, &an_disable_admin);
+
+ an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE;
+ an_changes = ((!an_disable && an_disable_admin) ||
+ (an_disable && !an_disable_admin));
+
+ if (!an_changes && link_modes == eth_proto_admin)
+ goto out;
+
+ mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN);
+ mlx5_toggle_port_link(mdev);
+
+out:
+ return err;
+}
+
+static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return sizeof(priv->channels.params.toeplitz_hash_key);
+}
+
+static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
+{
+ return MLX5E_INDIR_RQT_SIZE;
+}
+
+static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (indir)
+ memcpy(indir, priv->channels.params.indirection_rqt,
+ sizeof(priv->channels.params.indirection_rqt));
+
+ if (key)
+ memcpy(key, priv->channels.params.toeplitz_hash_key,
+ sizeof(priv->channels.params.toeplitz_hash_key));
+
+ if (hfunc)
+ *hfunc = priv->channels.params.rss_hfunc;
+
+ return 0;
+}
+
+static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+{
+ void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int ctxlen = MLX5_ST_SZ_BYTES(tirc);
+ int tt;
+
+ MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+ mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
+ }
+
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+ mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen);
+ }
+}
+
+static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ bool hash_changed = false;
+ void *in;
+
+ if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
+ (hfunc != ETH_RSS_HASH_XOR) &&
+ (hfunc != ETH_RSS_HASH_TOP))
+ return -EINVAL;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mutex_lock(&priv->state_lock);
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE &&
+ hfunc != priv->channels.params.rss_hfunc) {
+ priv->channels.params.rss_hfunc = hfunc;
+ hash_changed = true;
+ }
+
+ if (indir) {
+ memcpy(priv->channels.params.indirection_rqt, indir,
+ sizeof(priv->channels.params.indirection_rqt));
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ u32 rqtn = priv->indir_rqt.rqtn;
+ struct mlx5e_redirect_rqt_param rrp = {
+ .is_rss = true,
+ {
+ .rss = {
+ .hfunc = priv->channels.params.rss_hfunc,
+ .channels = &priv->channels,
+ },
+ },
+ };
+
+ mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
+ }
+ }
+
+ if (key) {
+ memcpy(priv->channels.params.toeplitz_hash_key, key,
+ sizeof(priv->channels.params.toeplitz_hash_key));
+ hash_changed = hash_changed ||
+ priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP;
+ }
+
+ if (hash_changed)
+ mlx5e_modify_tirs_hash(priv, in, inlen);
+
+ mutex_unlock(&priv->state_lock);
+
+ kvfree(in);
+
+ return 0;
+}
+
+#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100
+#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000
+#define MLX5E_PFC_PREVEN_MINOR_PRECENT 85
+#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC 80
+#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \
+ max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \
+ (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100)
+
+static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev,
+ u16 *pfc_prevention_tout)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+ !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+ return -EOPNOTSUPP;
+
+ return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL);
+}
+
+static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev,
+ u16 pfc_preven)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 critical_tout;
+ u16 minor;
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+ !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+ return -EOPNOTSUPP;
+
+ critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ?
+ MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC :
+ pfc_preven;
+
+ if (critical_tout != PFC_STORM_PREVENTION_DISABLE &&
+ (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC ||
+ critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) {
+ netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n",
+ __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC,
+ MLX5E_PFC_PREVEN_TOUT_MAX_MSEC);
+ return -EINVAL;
+ }
+
+ minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout);
+ return mlx5_set_port_stall_watermark(mdev, critical_tout,
+ minor);
+}
+
+static int mlx5e_get_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ void *data)
+{
+ int err;
+
+ switch (tuna->id) {
+ case ETHTOOL_PFC_PREVENTION_TOUT:
+ err = mlx5e_get_pfc_prevention_tout(dev, data);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int mlx5e_set_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ const void *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+
+ switch (tuna->id) {
+ case ETHTOOL_PFC_PREVENTION_TOUT:
+ err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static void mlx5e_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause,
+ &pauseparam->tx_pause);
+ if (err) {
+ netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
+ __func__, err);
+ }
+}
+
+static int mlx5e_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+ return -EOPNOTSUPP;
+
+ if (pauseparam->autoneg)
+ return -EINVAL;
+
+ err = mlx5_set_port_pause(mdev,
+ pauseparam->rx_pause ? 1 : 0,
+ pauseparam->tx_pause ? 1 : 0);
+ if (err) {
+ netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
+ __func__, err);
+ }
+
+ return err;
+}
+
+int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
+ struct ethtool_ts_info *info)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ info->phc_index = mlx5_clock_get_ptp_index(mdev);
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
+ info->phc_index == -1)
+ return 0;
+
+ info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
+
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
+
+static int mlx5e_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_get_ts_info(priv, info);
+}
+
+static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev)
+{
+ __u32 ret = 0;
+
+ if (MLX5_CAP_GEN(mdev, wol_g))
+ ret |= WAKE_MAGIC;
+
+ if (MLX5_CAP_GEN(mdev, wol_s))
+ ret |= WAKE_MAGICSECURE;
+
+ if (MLX5_CAP_GEN(mdev, wol_a))
+ ret |= WAKE_ARP;
+
+ if (MLX5_CAP_GEN(mdev, wol_b))
+ ret |= WAKE_BCAST;
+
+ if (MLX5_CAP_GEN(mdev, wol_m))
+ ret |= WAKE_MCAST;
+
+ if (MLX5_CAP_GEN(mdev, wol_u))
+ ret |= WAKE_UCAST;
+
+ if (MLX5_CAP_GEN(mdev, wol_p))
+ ret |= WAKE_PHY;
+
+ return ret;
+}
+
+static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode)
+{
+ __u32 ret = 0;
+
+ if (mode & MLX5_WOL_MAGIC)
+ ret |= WAKE_MAGIC;
+
+ if (mode & MLX5_WOL_SECURED_MAGIC)
+ ret |= WAKE_MAGICSECURE;
+
+ if (mode & MLX5_WOL_ARP)
+ ret |= WAKE_ARP;
+
+ if (mode & MLX5_WOL_BROADCAST)
+ ret |= WAKE_BCAST;
+
+ if (mode & MLX5_WOL_MULTICAST)
+ ret |= WAKE_MCAST;
+
+ if (mode & MLX5_WOL_UNICAST)
+ ret |= WAKE_UCAST;
+
+ if (mode & MLX5_WOL_PHY_ACTIVITY)
+ ret |= WAKE_PHY;
+
+ return ret;
+}
+
+static u8 mlx5e_refomrat_wol_mode_linux_to_mlx5(__u32 mode)
+{
+ u8 ret = 0;
+
+ if (mode & WAKE_MAGIC)
+ ret |= MLX5_WOL_MAGIC;
+
+ if (mode & WAKE_MAGICSECURE)
+ ret |= MLX5_WOL_SECURED_MAGIC;
+
+ if (mode & WAKE_ARP)
+ ret |= MLX5_WOL_ARP;
+
+ if (mode & WAKE_BCAST)
+ ret |= MLX5_WOL_BROADCAST;
+
+ if (mode & WAKE_MCAST)
+ ret |= MLX5_WOL_MULTICAST;
+
+ if (mode & WAKE_UCAST)
+ ret |= MLX5_WOL_UNICAST;
+
+ if (mode & WAKE_PHY)
+ ret |= MLX5_WOL_PHY_ACTIVITY;
+
+ return ret;
+}
+
+static void mlx5e_get_wol(struct net_device *netdev,
+ struct ethtool_wolinfo *wol)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 mlx5_wol_mode;
+ int err;
+
+ memset(wol, 0, sizeof(*wol));
+
+ wol->supported = mlx5e_get_wol_supported(mdev);
+ if (!wol->supported)
+ return;
+
+ err = mlx5_query_port_wol(mdev, &mlx5_wol_mode);
+ if (err)
+ return;
+
+ wol->wolopts = mlx5e_refomrat_wol_mode_mlx5_to_linux(mlx5_wol_mode);
+}
+
+static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ __u32 wol_supported = mlx5e_get_wol_supported(mdev);
+ u32 mlx5_wol_mode;
+
+ if (!wol_supported)
+ return -EOPNOTSUPP;
+
+ if (wol->wolopts & ~wol_supported)
+ return -EINVAL;
+
+ mlx5_wol_mode = mlx5e_refomrat_wol_mode_linux_to_mlx5(wol->wolopts);
+
+ return mlx5_set_port_wol(mdev, mlx5_wol_mode);
+}
+
+static u32 mlx5e_get_msglevel(struct net_device *dev)
+{
+ return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel;
+}
+
+static void mlx5e_set_msglevel(struct net_device *dev, u32 val)
+{
+ ((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val;
+}
+
+static int mlx5e_set_phys_id(struct net_device *dev,
+ enum ethtool_phys_id_state state)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 beacon_duration;
+
+ if (!MLX5_CAP_GEN(mdev, beacon_led))
+ return -EOPNOTSUPP;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ beacon_duration = MLX5_BEACON_DURATION_INF;
+ break;
+ case ETHTOOL_ID_INACTIVE:
+ beacon_duration = MLX5_BEACON_DURATION_OFF;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return mlx5_set_port_beacon(mdev, beacon_duration);
+}
+
+static int mlx5e_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *dev = priv->mdev;
+ int size_read = 0;
+ u8 data[4];
+
+ size_read = mlx5_query_module_eeprom(dev, 0, 2, data);
+ if (size_read < 2)
+ return -EIO;
+
+ /* data[0] = identifier byte */
+ switch (data[0]) {
+ case MLX5_MODULE_ID_QSFP:
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ break;
+ case MLX5_MODULE_ID_QSFP_PLUS:
+ case MLX5_MODULE_ID_QSFP28:
+ /* data[1] = revision id */
+ if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) {
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ }
+ break;
+ case MLX5_MODULE_ID_SFP:
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ break;
+ default:
+ netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
+ __func__, data[0]);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5e_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee,
+ u8 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int offset = ee->offset;
+ int size_read;
+ int i = 0;
+
+ if (!ee->len)
+ return -EINVAL;
+
+ memset(data, 0, ee->len);
+
+ while (i < ee->len) {
+ size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i,
+ data + i);
+
+ if (!size_read)
+ /* Done reading */
+ return 0;
+
+ if (size_read < 0) {
+ netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n",
+ __func__, size_read);
+ return size_read;
+ }
+
+ i += size_read;
+ offset += size_read;
+ }
+
+ return 0;
+}
+
+typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
+
+static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
+ bool is_rx_cq)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ bool mode_changed;
+ u8 cq_period_mode, current_cq_period_mode;
+ int err = 0;
+
+ cq_period_mode = enable ?
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+ MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ current_cq_period_mode = is_rx_cq ?
+ priv->channels.params.rx_cq_moderation.cq_period_mode :
+ priv->channels.params.tx_cq_moderation.cq_period_mode;
+ mode_changed = cq_period_mode != current_cq_period_mode;
+
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
+ !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
+ return -EOPNOTSUPP;
+
+ if (!mode_changed)
+ return 0;
+
+ new_channels.params = priv->channels.params;
+ if (is_rx_cq)
+ mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode);
+ else
+ mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ return 0;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ return err;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ return 0;
+}
+
+static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable)
+{
+ return set_pflag_cqe_based_moder(netdev, enable, false);
+}
+
+static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
+{
+ return set_pflag_cqe_based_moder(netdev, enable, true);
+}
+
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val)
+{
+ bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS);
+ struct mlx5e_channels new_channels = {};
+ int err = 0;
+
+ if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
+ return new_val ? -EOPNOTSUPP : 0;
+
+ if (curr_val == new_val)
+ return 0;
+
+ new_channels.params = priv->channels.params;
+ MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ return 0;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ return err;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
+ MLX5E_GET_PFLAG(&priv->channels.params,
+ MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
+
+ return 0;
+}
+
+static int set_pflag_rx_cqe_compress(struct net_device *netdev,
+ bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, cqe_compression))
+ return -EOPNOTSUPP;
+
+ if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) {
+ netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
+ return -EINVAL;
+ }
+
+ err = mlx5e_modify_rx_cqe_compression_locked(priv, enable);
+ if (err)
+ return err;
+
+ priv->channels.params.rx_cqe_compress_def = enable;
+
+ return 0;
+}
+
+static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ int err;
+
+ if (enable) {
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
+ return -EOPNOTSUPP;
+ if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
+ return -EINVAL;
+ } else if (priv->channels.params.lro_en) {
+ netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n");
+ return -EINVAL;
+ }
+
+ new_channels.params = priv->channels.params;
+
+ MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
+ mlx5e_set_rq_type(mdev, &new_channels.params);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ return 0;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ return err;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ return 0;
+}
+
+static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels *channels = &priv->channels;
+ struct mlx5e_channel *c;
+ int i;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) ||
+ priv->channels.params.xdp_prog)
+ return 0;
+
+ for (i = 0; i < channels->num; i++) {
+ c = channels->c[i];
+ if (enable)
+ __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ else
+ __clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ }
+
+ return 0;
+}
+
+static int mlx5e_handle_pflag(struct net_device *netdev,
+ u32 wanted_flags,
+ enum mlx5e_priv_flag flag,
+ mlx5e_pflag_handler pflag_handler)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ bool enable = !!(wanted_flags & flag);
+ u32 changes = wanted_flags ^ priv->channels.params.pflags;
+ int err;
+
+ if (!(changes & flag))
+ return 0;
+
+ err = pflag_handler(netdev, enable);
+ if (err) {
+ netdev_err(netdev, "%s private flag 0x%x failed err %d\n",
+ enable ? "Enable" : "Disable", flag, err);
+ return err;
+ }
+
+ MLX5E_SET_PFLAG(&priv->channels.params, flag, enable);
+ return 0;
+}
+
+static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ set_pflag_rx_cqe_based_moder);
+ if (err)
+ goto out;
+
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ set_pflag_tx_cqe_based_moder);
+ if (err)
+ goto out;
+
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_RX_CQE_COMPRESS,
+ set_pflag_rx_cqe_compress);
+ if (err)
+ goto out;
+
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_RX_STRIDING_RQ,
+ set_pflag_rx_striding_rq);
+ if (err)
+ goto out;
+
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
+ set_pflag_rx_no_csum_complete);
+
+out:
+ mutex_unlock(&priv->state_lock);
+
+ /* Need to fix some features.. */
+ netdev_update_features(netdev);
+
+ return err;
+}
+
+static u32 mlx5e_get_priv_flags(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return priv->channels.params.pflags;
+}
+
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ struct ethtool_flash *flash)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *dev = priv->netdev;
+ const struct firmware *fw;
+ int err;
+
+ if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+ return -EOPNOTSUPP;
+
+ err = request_firmware_direct(&fw, flash->data, &dev->dev);
+ if (err)
+ return err;
+
+ dev_hold(dev);
+ rtnl_unlock();
+
+ err = mlx5_firmware_flash(mdev, fw);
+ release_firmware(fw);
+
+ rtnl_lock();
+ dev_put(dev);
+ return err;
+}
+
+static int mlx5e_flash_device(struct net_device *dev,
+ struct ethtool_flash *flash)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_flash_device(priv, flash);
+}
+
+#ifndef CONFIG_MLX5_EN_RXNFC
+/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS
+ * otherwise this function will be defined from en_fs_ethtool.c
+ */
+static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (info->cmd != ETHTOOL_GRXRINGS)
+ return -EOPNOTSUPP;
+ /* ring_count is needed by ethtool -x */
+ info->data = priv->channels.params.num_channels;
+ return 0;
+}
+#endif
+
+const struct ethtool_ops mlx5e_ethtool_ops = {
+ .get_drvinfo = mlx5e_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlx5e_get_strings,
+ .get_sset_count = mlx5e_get_sset_count,
+ .get_ethtool_stats = mlx5e_get_ethtool_stats,
+ .get_ringparam = mlx5e_get_ringparam,
+ .set_ringparam = mlx5e_set_ringparam,
+ .get_channels = mlx5e_get_channels,
+ .set_channels = mlx5e_set_channels,
+ .get_coalesce = mlx5e_get_coalesce,
+ .set_coalesce = mlx5e_set_coalesce,
+ .get_link_ksettings = mlx5e_get_link_ksettings,
+ .set_link_ksettings = mlx5e_set_link_ksettings,
+ .get_rxfh_key_size = mlx5e_get_rxfh_key_size,
+ .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size,
+ .get_rxfh = mlx5e_get_rxfh,
+ .set_rxfh = mlx5e_set_rxfh,
+ .get_rxnfc = mlx5e_get_rxnfc,
+#ifdef CONFIG_MLX5_EN_RXNFC
+ .set_rxnfc = mlx5e_set_rxnfc,
+#endif
+ .flash_device = mlx5e_flash_device,
+ .get_tunable = mlx5e_get_tunable,
+ .set_tunable = mlx5e_set_tunable,
+ .get_pauseparam = mlx5e_get_pauseparam,
+ .set_pauseparam = mlx5e_set_pauseparam,
+ .get_ts_info = mlx5e_get_ts_info,
+ .set_phys_id = mlx5e_set_phys_id,
+ .get_wol = mlx5e_get_wol,
+ .set_wol = mlx5e_set_wol,
+ .get_module_info = mlx5e_get_module_info,
+ .get_module_eeprom = mlx5e_get_module_eeprom,
+ .get_priv_flags = mlx5e_get_priv_flags,
+ .set_priv_flags = mlx5e_set_priv_flags,
+ .self_test = mlx5e_self_test,
+ .get_msglevel = mlx5e_get_msglevel,
+ .set_msglevel = mlx5e_set_msglevel,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
new file mode 100644
index 000000000..c6eea6b6b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -0,0 +1,1576 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/list.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include "en.h"
+#include "lib/mpfs.h"
+
+static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_l2_rule *ai, int type);
+static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_l2_rule *ai);
+
+enum {
+ MLX5E_FULLMATCH = 0,
+ MLX5E_ALLMULTI = 1,
+ MLX5E_PROMISC = 2,
+};
+
+enum {
+ MLX5E_UC = 0,
+ MLX5E_MC_IPV4 = 1,
+ MLX5E_MC_IPV6 = 2,
+ MLX5E_MC_OTHER = 3,
+};
+
+enum {
+ MLX5E_ACTION_NONE = 0,
+ MLX5E_ACTION_ADD = 1,
+ MLX5E_ACTION_DEL = 2,
+};
+
+struct mlx5e_l2_hash_node {
+ struct hlist_node hlist;
+ u8 action;
+ struct mlx5e_l2_rule ai;
+ bool mpfs;
+};
+
+static inline int mlx5e_hash_l2(u8 *addr)
+{
+ return addr[5];
+}
+
+static void mlx5e_add_l2_to_hash(struct hlist_head *hash, u8 *addr)
+{
+ struct mlx5e_l2_hash_node *hn;
+ int ix = mlx5e_hash_l2(addr);
+ int found = 0;
+
+ hlist_for_each_entry(hn, &hash[ix], hlist)
+ if (ether_addr_equal_64bits(hn->ai.addr, addr)) {
+ found = 1;
+ break;
+ }
+
+ if (found) {
+ hn->action = MLX5E_ACTION_NONE;
+ return;
+ }
+
+ hn = kzalloc(sizeof(*hn), GFP_ATOMIC);
+ if (!hn)
+ return;
+
+ ether_addr_copy(hn->ai.addr, addr);
+ hn->action = MLX5E_ACTION_ADD;
+
+ hlist_add_head(&hn->hlist, &hash[ix]);
+}
+
+static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn)
+{
+ hlist_del(&hn->hlist);
+ kfree(hn);
+}
+
+static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
+{
+ struct net_device *ndev = priv->netdev;
+ int max_list_size;
+ int list_size;
+ u16 *vlans;
+ int vlan;
+ int err;
+ int i;
+
+ list_size = 0;
+ for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID)
+ list_size++;
+
+ max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list);
+
+ if (list_size > max_list_size) {
+ netdev_warn(ndev,
+ "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
+ list_size, max_list_size);
+ list_size = max_list_size;
+ }
+
+ vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
+ if (!vlans)
+ return -ENOMEM;
+
+ i = 0;
+ for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+ if (i >= list_size)
+ break;
+ vlans[i++] = vlan;
+ }
+
+ err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size);
+ if (err)
+ netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n",
+ err);
+
+ kfree(vlans);
+ return err;
+}
+
+enum mlx5e_vlan_rule_type {
+ MLX5E_VLAN_RULE_TYPE_UNTAGGED,
+ MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID,
+ MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID,
+ MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID,
+ MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID,
+};
+
+static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type,
+ u16 vid, struct mlx5_flow_spec *spec)
+{
+ struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle **rule_p;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ int err = 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = priv->fs.l2.ft.t;
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ /* cvlan_tag enabled in match criteria and
+ * disabled in match value means both S & C tags
+ * don't exist (untagged of both)
+ */
+ rule_p = &priv->fs.vlan.untagged_rule;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
+ rule_p = &priv->fs.vlan.any_cvlan_rule;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
+ rule_p = &priv->fs.vlan.any_svlan_rule;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
+ rule_p = &priv->fs.vlan.active_svlans_rule[vid];
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
+ vid);
+ break;
+ default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */
+ rule_p = &priv->fs.vlan.active_cvlans_rule[vid];
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
+ vid);
+ break;
+ }
+
+ if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type))
+ return 0;
+
+ *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+
+ if (IS_ERR(*rule_p)) {
+ err = PTR_ERR(*rule_p);
+ *rule_p = NULL;
+ netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+ }
+
+ return err;
+}
+
+static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID)
+ mlx5e_vport_context_update_vlans(priv);
+
+ err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec);
+
+ kvfree(spec);
+
+ return err;
+}
+
+static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ if (priv->fs.vlan.untagged_rule) {
+ mlx5_del_flow_rules(priv->fs.vlan.untagged_rule);
+ priv->fs.vlan.untagged_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
+ if (priv->fs.vlan.any_cvlan_rule) {
+ mlx5_del_flow_rules(priv->fs.vlan.any_cvlan_rule);
+ priv->fs.vlan.any_cvlan_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
+ if (priv->fs.vlan.any_svlan_rule) {
+ mlx5_del_flow_rules(priv->fs.vlan.any_svlan_rule);
+ priv->fs.vlan.any_svlan_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
+ if (priv->fs.vlan.active_svlans_rule[vid]) {
+ mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]);
+ priv->fs.vlan.active_svlans_rule[vid] = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID:
+ if (priv->fs.vlan.active_cvlans_rule[vid]) {
+ mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]);
+ priv->fs.vlan.active_cvlans_rule[vid] = NULL;
+ }
+ mlx5e_vport_context_update_vlans(priv);
+ break;
+ }
+}
+
+static void mlx5e_del_any_vid_rules(struct mlx5e_priv *priv)
+{
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+}
+
+static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ if (err)
+ return err;
+
+ return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+}
+
+void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv)
+{
+ if (!priv->fs.vlan.cvlan_filter_disabled)
+ return;
+
+ priv->fs.vlan.cvlan_filter_disabled = false;
+ if (priv->netdev->flags & IFF_PROMISC)
+ return;
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+}
+
+void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv)
+{
+ if (priv->fs.vlan.cvlan_filter_disabled)
+ return;
+
+ priv->fs.vlan.cvlan_filter_disabled = true;
+ if (priv->netdev->flags & IFF_PROMISC)
+ return;
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+}
+
+static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid)
+{
+ int err;
+
+ set_bit(vid, priv->fs.vlan.active_cvlans);
+
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ if (err)
+ clear_bit(vid, priv->fs.vlan.active_cvlans);
+
+ return err;
+}
+
+static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid)
+{
+ struct net_device *netdev = priv->netdev;
+ int err;
+
+ set_bit(vid, priv->fs.vlan.active_svlans);
+
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ if (err) {
+ clear_bit(vid, priv->fs.vlan.active_svlans);
+ return err;
+ }
+
+ /* Need to fix some features.. */
+ netdev_update_features(netdev);
+ return err;
+}
+
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (be16_to_cpu(proto) == ETH_P_8021Q)
+ return mlx5e_vlan_rx_add_cvid(priv, vid);
+ else if (be16_to_cpu(proto) == ETH_P_8021AD)
+ return mlx5e_vlan_rx_add_svid(priv, vid);
+
+ return -EOPNOTSUPP;
+}
+
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (be16_to_cpu(proto) == ETH_P_8021Q) {
+ clear_bit(vid, priv->fs.vlan.active_cvlans);
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ } else if (be16_to_cpu(proto) == ETH_P_8021AD) {
+ clear_bit(vid, priv->fs.vlan.active_svlans);
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ netdev_update_features(dev);
+ }
+
+ return 0;
+}
+
+static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
+{
+ int i;
+
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+ for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ }
+
+ for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+
+ if (priv->fs.vlan.cvlan_filter_disabled)
+ mlx5e_add_any_vid_rules(priv);
+}
+
+static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
+{
+ int i;
+
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+ for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ }
+
+ for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+
+ WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state)));
+
+ /* must be called after DESTROY bit is set and
+ * set_rx_mode is called and flushed
+ */
+ if (priv->fs.vlan.cvlan_filter_disabled)
+ mlx5e_del_any_vid_rules(priv);
+}
+
+#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
+ for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
+ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
+
+static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
+ struct mlx5e_l2_hash_node *hn)
+{
+ u8 action = hn->action;
+ u8 mac_addr[ETH_ALEN];
+ int l2_err = 0;
+
+ ether_addr_copy(mac_addr, hn->ai.addr);
+
+ switch (action) {
+ case MLX5E_ACTION_ADD:
+ mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH);
+ if (!is_multicast_ether_addr(mac_addr)) {
+ l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr);
+ hn->mpfs = !l2_err;
+ }
+ hn->action = MLX5E_ACTION_NONE;
+ break;
+
+ case MLX5E_ACTION_DEL:
+ if (!is_multicast_ether_addr(mac_addr) && hn->mpfs)
+ l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr);
+ mlx5e_del_l2_flow_rule(priv, &hn->ai);
+ mlx5e_del_l2_from_hash(hn);
+ break;
+ }
+
+ if (l2_err)
+ netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n",
+ action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err);
+}
+
+static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ struct netdev_hw_addr *ha;
+
+ netif_addr_lock_bh(netdev);
+
+ mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc,
+ priv->netdev->dev_addr);
+
+ netdev_for_each_uc_addr(ha, netdev)
+ mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, ha->addr);
+
+ netdev_for_each_mc_addr(ha, netdev)
+ mlx5e_add_l2_to_hash(priv->fs.l2.netdev_mc, ha->addr);
+
+ netif_addr_unlock_bh(netdev);
+}
+
+static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
+ u8 addr_array[][ETH_ALEN], int size)
+{
+ bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+ struct net_device *ndev = priv->netdev;
+ struct mlx5e_l2_hash_node *hn;
+ struct hlist_head *addr_list;
+ struct hlist_node *tmp;
+ int i = 0;
+ int hi;
+
+ addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc;
+
+ if (is_uc) /* Make sure our own address is pushed first */
+ ether_addr_copy(addr_array[i++], ndev->dev_addr);
+ else if (priv->fs.l2.broadcast_enabled)
+ ether_addr_copy(addr_array[i++], ndev->broadcast);
+
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) {
+ if (ether_addr_equal(ndev->dev_addr, hn->ai.addr))
+ continue;
+ if (i >= size)
+ break;
+ ether_addr_copy(addr_array[i++], hn->ai.addr);
+ }
+}
+
+static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
+ int list_type)
+{
+ bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+ struct mlx5e_l2_hash_node *hn;
+ u8 (*addr_array)[ETH_ALEN] = NULL;
+ struct hlist_head *addr_list;
+ struct hlist_node *tmp;
+ int max_size;
+ int size;
+ int err;
+ int hi;
+
+ size = is_uc ? 0 : (priv->fs.l2.broadcast_enabled ? 1 : 0);
+ max_size = is_uc ?
+ 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list);
+
+ addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc;
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi)
+ size++;
+
+ if (size > max_size) {
+ netdev_warn(priv->netdev,
+ "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
+ is_uc ? "UC" : "MC", size, max_size);
+ size = max_size;
+ }
+
+ if (size) {
+ addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+ if (!addr_array) {
+ err = -ENOMEM;
+ goto out;
+ }
+ mlx5e_fill_addr_array(priv, list_type, addr_array, size);
+ }
+
+ err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size);
+out:
+ if (err)
+ netdev_err(priv->netdev,
+ "Failed to modify vport %s list err(%d)\n",
+ is_uc ? "UC" : "MC", err);
+ kfree(addr_array);
+}
+
+static void mlx5e_vport_context_update(struct mlx5e_priv *priv)
+{
+ struct mlx5e_l2_table *ea = &priv->fs.l2;
+
+ mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC);
+ mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC);
+ mlx5_modify_nic_vport_promisc(priv->mdev, 0,
+ ea->allmulti_enabled,
+ ea->promisc_enabled);
+}
+
+static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv)
+{
+ struct mlx5e_l2_hash_node *hn;
+ struct hlist_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i)
+ mlx5e_execute_l2_action(priv, hn);
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i)
+ mlx5e_execute_l2_action(priv, hn);
+}
+
+static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv)
+{
+ struct mlx5e_l2_hash_node *hn;
+ struct hlist_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i)
+ hn->action = MLX5E_ACTION_DEL;
+ mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i)
+ hn->action = MLX5E_ACTION_DEL;
+
+ if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state))
+ mlx5e_sync_netdev_addr(priv);
+
+ mlx5e_apply_netdev_addr(priv);
+}
+
+void mlx5e_set_rx_mode_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ set_rx_mode_work);
+
+ struct mlx5e_l2_table *ea = &priv->fs.l2;
+ struct net_device *ndev = priv->netdev;
+
+ bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC);
+ bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI);
+ bool broadcast_enabled = rx_mode_enable;
+
+ bool enable_promisc = !ea->promisc_enabled && promisc_enabled;
+ bool disable_promisc = ea->promisc_enabled && !promisc_enabled;
+ bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled;
+ bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled;
+ bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled;
+ bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled;
+
+ if (enable_promisc) {
+ if (!priv->channels.params.vlan_strip_disable)
+ netdev_warn_once(ndev,
+ "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
+ mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC);
+ if (!priv->fs.vlan.cvlan_filter_disabled)
+ mlx5e_add_any_vid_rules(priv);
+ }
+ if (enable_allmulti)
+ mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI);
+ if (enable_broadcast)
+ mlx5e_add_l2_flow_rule(priv, &ea->broadcast, MLX5E_FULLMATCH);
+
+ mlx5e_handle_netdev_addr(priv);
+
+ if (disable_broadcast)
+ mlx5e_del_l2_flow_rule(priv, &ea->broadcast);
+ if (disable_allmulti)
+ mlx5e_del_l2_flow_rule(priv, &ea->allmulti);
+ if (disable_promisc) {
+ if (!priv->fs.vlan.cvlan_filter_disabled)
+ mlx5e_del_any_vid_rules(priv);
+ mlx5e_del_l2_flow_rule(priv, &ea->promisc);
+ }
+
+ ea->promisc_enabled = promisc_enabled;
+ ea->allmulti_enabled = allmulti_enabled;
+ ea->broadcast_enabled = broadcast_enabled;
+
+ mlx5e_vport_context_update(priv);
+}
+
+static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft)
+{
+ int i;
+
+ for (i = ft->num_groups - 1; i >= 0; i--) {
+ if (!IS_ERR_OR_NULL(ft->g[i]))
+ mlx5_destroy_flow_group(ft->g[i]);
+ ft->g[i] = NULL;
+ }
+ ft->num_groups = 0;
+}
+
+void mlx5e_init_l2_addr(struct mlx5e_priv *priv)
+{
+ ether_addr_copy(priv->fs.l2.broadcast.addr, priv->netdev->broadcast);
+}
+
+void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
+{
+ mlx5e_destroy_groups(ft);
+ kfree(ft->g);
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+}
+
+static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_NUM_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->rules[i])) {
+ mlx5_del_flow_rules(ttc->rules[i]);
+ ttc->rules[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+ mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+ ttc->tunnel_rules[i] = NULL;
+ }
+ }
+}
+
+struct mlx5e_etype_proto {
+ u16 etype;
+ u8 proto;
+};
+
+static struct mlx5e_etype_proto ttc_rules[] = {
+ [MLX5E_TT_IPV4_TCP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5E_TT_IPV6_TCP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5E_TT_IPV4_UDP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5E_TT_IPV6_UDP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5E_TT_IPV4_IPSEC_AH] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5E_TT_IPV6_IPSEC_AH] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5E_TT_IPV4_IPSEC_ESP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5E_TT_IPV6_IPSEC_ESP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5E_TT_IPV4] = {
+ .etype = ETH_P_IP,
+ .proto = 0,
+ },
+ [MLX5E_TT_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = 0,
+ },
+ [MLX5E_TT_ANY] = {
+ .etype = 0,
+ .proto = 0,
+ },
+};
+
+static struct mlx5e_etype_proto ttc_tunnel_rules[] = {
+ [MLX5E_TT_IPV4_GRE] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5E_TT_IPV6_GRE] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_GRE,
+ },
+};
+
+static u8 mlx5e_etype_to_ipv(u16 ethertype)
+{
+ if (ethertype == ETH_P_IP)
+ return 4;
+
+ if (ethertype == ETH_P_IPV6)
+ return 6;
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest,
+ u16 etype,
+ u8 proto)
+{
+ int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+ }
+
+ ipv = mlx5e_etype_to_ipv(etype);
+ if (match_ipv_outer && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+ } else if (etype) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv,
+ struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle **rules;
+ struct mlx5_flow_table *ft;
+ int tt;
+ int err;
+
+ ft = ttc->ft.t;
+ rules = ttc->rules;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
+ if (tt == MLX5E_TT_ANY)
+ dest.tir_num = params->any_tt_tirn;
+ else
+ dest.tir_num = params->indir_tirn[tt];
+ rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rules[tt]))
+ goto del_rules;
+ }
+
+ if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return 0;
+
+ rules = ttc->tunnel_rules;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = params->inner_ttc->ft.t;
+ for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
+ rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
+ ttc_tunnel_rules[tt].etype,
+ ttc_tunnel_rules[tt].proto);
+ if (IS_ERR(rules[tt]))
+ goto del_rules;
+ }
+
+ return 0;
+
+del_rules:
+ err = PTR_ERR(rules[tt]);
+ rules[tt] = NULL;
+ mlx5e_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+#define MLX5E_TTC_NUM_GROUPS 3
+#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT)
+#define MLX5E_TTC_GROUP2_SIZE BIT(1)
+#define MLX5E_TTC_GROUP3_SIZE BIT(0)
+#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\
+ MLX5E_TTC_GROUP2_SIZE +\
+ MLX5E_TTC_GROUP3_SIZE)
+
+#define MLX5E_INNER_TTC_NUM_GROUPS 3
+#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3)
+#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1)
+#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0)
+#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\
+ MLX5E_INNER_TTC_GROUP2_SIZE +\
+ MLX5E_INNER_TTC_GROUP3_SIZE)
+
+static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
+ bool use_ipv)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_flow_table *ft = &ttc->ft;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS,
+ sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ ft->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ if (use_ipv)
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest,
+ u16 etype, u8 proto)
+{
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ipv = mlx5e_etype_to_ipv(etype);
+ if (etype && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+ }
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv,
+ struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle **rules;
+ struct mlx5_flow_table *ft;
+ int err;
+ int tt;
+
+ ft = ttc->ft.t;
+ rules = ttc->rules;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
+ if (tt == MLX5E_TT_ANY)
+ dest.tir_num = params->any_tt_tirn;
+ else
+ dest.tir_num = params->indir_tirn[tt];
+
+ rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest,
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rules[tt]))
+ goto del_rules;
+ }
+
+ return 0;
+
+del_rules:
+ err = PTR_ERR(rules[tt]);
+ rules[tt] = NULL;
+ mlx5e_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_flow_table *ft = &ttc->ft;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ ft->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_INNER_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_INNER_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_INNER_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv,
+ struct ttc_params *ttc_params)
+{
+ ttc_params->any_tt_tirn = priv->direct_tir[0].tirn;
+ ttc_params->inner_ttc = &priv->fs.inner_ttc;
+}
+
+void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params)
+{
+ struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+
+ ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE;
+ ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
+ ft_attr->prio = MLX5E_NIC_PRIO;
+}
+
+void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params)
+
+{
+ struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+
+ ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
+ ft_attr->level = MLX5E_TTC_FT_LEVEL;
+ ft_attr->prio = MLX5E_NIC_PRIO;
+}
+
+int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc)
+{
+ struct mlx5e_flow_table *ft = &ttc->ft;
+ int err;
+
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return 0;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = mlx5e_create_inner_ttc_table_groups(ttc);
+ if (err)
+ goto err;
+
+ err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
+ struct mlx5e_ttc_table *ttc)
+{
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return;
+
+ mlx5e_cleanup_ttc_rules(ttc);
+ mlx5e_destroy_flow_table(&ttc->ft);
+}
+
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
+ struct mlx5e_ttc_table *ttc)
+{
+ mlx5e_cleanup_ttc_rules(ttc);
+ mlx5e_destroy_flow_table(&ttc->ft);
+}
+
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc)
+{
+ bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
+ struct mlx5e_flow_table *ft = &ttc->ft;
+ int err;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer);
+ if (err)
+ goto err;
+
+ err = mlx5e_generate_ttc_table_rules(priv, params, ttc);
+ if (err)
+ goto err;
+
+ return 0;
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_l2_rule *ai)
+{
+ if (!IS_ERR_OR_NULL(ai->rule)) {
+ mlx5_del_flow_rules(ai->rule);
+ ai->rule = NULL;
+ }
+}
+
+static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_l2_rule *ai, int type)
+{
+ struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 *mc_dmac;
+ u8 *mv_dmac;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dmac_47_16);
+ mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dmac_47_16);
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = priv->fs.ttc.ft.t;
+
+ switch (type) {
+ case MLX5E_FULLMATCH:
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ eth_broadcast_addr(mc_dmac);
+ ether_addr_copy(mv_dmac, ai->addr);
+ break;
+
+ case MLX5E_ALLMULTI:
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ mc_dmac[0] = 0x01;
+ mv_dmac[0] = 0x01;
+ break;
+
+ case MLX5E_PROMISC:
+ break;
+ }
+
+ ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(ai->rule)) {
+ netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n",
+ __func__, mv_dmac);
+ err = PTR_ERR(ai->rule);
+ ai->rule = NULL;
+ }
+
+ kvfree(spec);
+
+ return err;
+}
+
+#define MLX5E_NUM_L2_GROUPS 3
+#define MLX5E_L2_GROUP1_SIZE BIT(0)
+#define MLX5E_L2_GROUP2_SIZE BIT(15)
+#define MLX5E_L2_GROUP3_SIZE BIT(0)
+#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\
+ MLX5E_L2_GROUP2_SIZE +\
+ MLX5E_L2_GROUP3_SIZE)
+static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_flow_table *ft = &l2_table->ft;
+ int ix = 0;
+ u8 *mc_dmac;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ mc_dmac = MLX5_ADDR_OF(fte_match_param, mc,
+ outer_headers.dmac_47_16);
+ /* Flow Group for promiscuous */
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_L2_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ /* Flow Group for full match */
+ eth_broadcast_addr(mc_dmac);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_L2_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ /* Flow Group for allmulti */
+ eth_zero_addr(mc_dmac);
+ mc_dmac[0] = 0x01;
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_L2_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err_destroy_groups:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ mlx5e_destroy_groups(ft);
+ kvfree(in);
+ kfree(ft->g);
+
+ return err;
+}
+
+static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_flow_table(&priv->fs.l2.ft);
+}
+
+static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_l2_table *l2_table = &priv->fs.l2;
+ struct mlx5e_flow_table *ft = &l2_table->ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_L2_TABLE_SIZE;
+ ft_attr.level = MLX5E_L2_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = mlx5e_create_l2_table_groups(l2_table);
+ if (err)
+ goto err_destroy_flow_table;
+
+ return 0;
+
+err_destroy_flow_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return err;
+}
+
+#define MLX5E_NUM_VLAN_GROUPS 4
+#define MLX5E_VLAN_GROUP0_SIZE BIT(12)
+#define MLX5E_VLAN_GROUP1_SIZE BIT(12)
+#define MLX5E_VLAN_GROUP2_SIZE BIT(1)
+#define MLX5E_VLAN_GROUP3_SIZE BIT(0)
+#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\
+ MLX5E_VLAN_GROUP1_SIZE +\
+ MLX5E_VLAN_GROUP2_SIZE +\
+ MLX5E_VLAN_GROUP3_SIZE)
+
+static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in,
+ int inlen)
+{
+ int err;
+ int ix = 0;
+ u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP0_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ return 0;
+
+err_destroy_groups:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ mlx5e_destroy_groups(ft);
+
+ return err;
+}
+
+static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft)
+{
+ u32 *in;
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = __mlx5e_create_vlan_table_groups(ft, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_flow_table *ft = &priv->fs.vlan.ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE;
+ ft_attr.level = MLX5E_VLAN_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+ ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g) {
+ err = -ENOMEM;
+ goto err_destroy_vlan_table;
+ }
+
+ err = mlx5e_create_vlan_table_groups(ft);
+ if (err)
+ goto err_free_g;
+
+ mlx5e_add_vlan_rules(priv);
+
+ return 0;
+
+err_free_g:
+ kfree(ft->g);
+err_destroy_vlan_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return err;
+}
+
+static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
+{
+ mlx5e_del_vlan_rules(priv);
+ mlx5e_destroy_flow_table(&priv->fs.vlan.ft);
+}
+
+int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+{
+ struct ttc_params ttc_params = {};
+ int tt, err;
+
+ priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+
+ if (!priv->fs.ns)
+ return -EOPNOTSUPP;
+
+ err = mlx5e_arfs_create_tables(priv);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
+ err);
+ priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
+ }
+
+ mlx5e_set_ttc_basic_params(priv, &ttc_params);
+ mlx5e_set_inner_ttc_ft_params(&ttc_params);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
+
+ err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
+ err);
+ goto err_destroy_arfs_tables;
+ }
+
+ mlx5e_set_ttc_ft_params(&ttc_params);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
+
+ err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
+ err);
+ goto err_destroy_inner_ttc_table;
+ }
+
+ err = mlx5e_create_l2_table(priv);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create l2 table, err=%d\n",
+ err);
+ goto err_destroy_ttc_table;
+ }
+
+ err = mlx5e_create_vlan_table(priv);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create vlan table, err=%d\n",
+ err);
+ goto err_destroy_l2_table;
+ }
+
+ mlx5e_ethtool_init_steering(priv);
+
+ return 0;
+
+err_destroy_l2_table:
+ mlx5e_destroy_l2_table(priv);
+err_destroy_ttc_table:
+ mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+err_destroy_inner_ttc_table:
+ mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+err_destroy_arfs_tables:
+ mlx5e_arfs_destroy_tables(priv);
+
+ return err;
+}
+
+void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_vlan_table(priv);
+ mlx5e_destroy_l2_table(priv);
+ mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+ mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_arfs_destroy_tables(priv);
+ mlx5e_ethtool_cleanup_steering(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
new file mode 100644
index 000000000..41cde926c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -0,0 +1,844 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/fs.h>
+#include "en.h"
+
+struct mlx5e_ethtool_rule {
+ struct list_head list;
+ struct ethtool_rx_flow_spec flow_spec;
+ struct mlx5_flow_handle *rule;
+ struct mlx5e_ethtool_table *eth_ft;
+};
+
+static void put_flow_table(struct mlx5e_ethtool_table *eth_ft)
+{
+ if (!--eth_ft->num_rules) {
+ mlx5_destroy_flow_table(eth_ft->ft);
+ eth_ft->ft = NULL;
+ }
+}
+
+#define MLX5E_ETHTOOL_L3_L4_PRIO 0
+#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS)
+#define MLX5E_ETHTOOL_NUM_ENTRIES 64000
+#define MLX5E_ETHTOOL_NUM_GROUPS 10
+static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
+ struct ethtool_rx_flow_spec *fs,
+ int num_tuples)
+{
+ struct mlx5e_ethtool_table *eth_ft;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ int max_tuples;
+ int table_size;
+ int prio;
+
+ switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+ prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+ eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+ break;
+ case IP_USER_FLOW:
+ case IPV6_USER_FLOW:
+ max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+ prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+ eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+ break;
+ case ETHER_FLOW:
+ max_tuples = ETHTOOL_NUM_L2_FTS;
+ prio = max_tuples - num_tuples;
+ eth_ft = &priv->fs.ethtool.l2_ft[prio];
+ prio += MLX5E_ETHTOOL_L2_PRIO;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ eth_ft->num_rules++;
+ if (eth_ft->ft)
+ return eth_ft;
+
+ ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_ETHTOOL);
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev,
+ flow_table_properties_nic_receive.log_max_ft_size)),
+ MLX5E_ETHTOOL_NUM_ENTRIES);
+ ft = mlx5_create_auto_grouped_flow_table(ns, prio,
+ table_size,
+ MLX5E_ETHTOOL_NUM_GROUPS, 0, 0);
+ if (IS_ERR(ft))
+ return (void *)ft;
+
+ eth_ft->ft = ft;
+ return eth_ft;
+}
+
+static void mask_spec(u8 *mask, u8 *val, size_t size)
+{
+ unsigned int i;
+
+ for (i = 0; i < size; i++, mask++, val++)
+ *((u8 *)val) = *((u8 *)mask) & *((u8 *)val);
+}
+
+#define MLX5E_FTE_SET(header_p, fld, v) \
+ MLX5_SET(fte_match_set_lyr_2_4, header_p, fld, v)
+
+#define MLX5E_FTE_ADDR_OF(header_p, fld) \
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, header_p, fld)
+
+static void
+set_ip4(void *headers_c, void *headers_v, __be32 ip4src_m,
+ __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v)
+{
+ if (ip4src_m) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ip4src_v, sizeof(ip4src_v));
+ memset(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ 0xff, sizeof(ip4src_m));
+ }
+ if (ip4dst_m) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ip4dst_v, sizeof(ip4dst_v));
+ memset(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ 0xff, sizeof(ip4dst_m));
+ }
+
+ MLX5E_FTE_SET(headers_c, ethertype, 0xffff);
+ MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IP);
+}
+
+static void
+set_ip6(void *headers_c, void *headers_v, __be32 ip6src_m[4],
+ __be32 ip6src_v[4], __be32 ip6dst_m[4], __be32 ip6dst_v[4])
+{
+ u8 ip6_sz = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+
+ if (!ipv6_addr_any((struct in6_addr *)ip6src_m)) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ ip6src_v, ip6_sz);
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ ip6src_m, ip6_sz);
+ }
+ if (!ipv6_addr_any((struct in6_addr *)ip6dst_m)) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ ip6dst_v, ip6_sz);
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ ip6dst_m, ip6_sz);
+ }
+
+ MLX5E_FTE_SET(headers_c, ethertype, 0xffff);
+ MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IPV6);
+}
+
+static void
+set_tcp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v,
+ __be16 pdst_m, __be16 pdst_v)
+{
+ if (psrc_m) {
+ MLX5E_FTE_SET(headers_c, tcp_sport, 0xffff);
+ MLX5E_FTE_SET(headers_v, tcp_sport, ntohs(psrc_v));
+ }
+ if (pdst_m) {
+ MLX5E_FTE_SET(headers_c, tcp_dport, 0xffff);
+ MLX5E_FTE_SET(headers_v, tcp_dport, ntohs(pdst_v));
+ }
+
+ MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff);
+ MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_TCP);
+}
+
+static void
+set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v,
+ __be16 pdst_m, __be16 pdst_v)
+{
+ if (psrc_m) {
+ MLX5E_FTE_SET(headers_c, udp_sport, 0xffff);
+ MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v));
+ }
+
+ if (pdst_m) {
+ MLX5E_FTE_SET(headers_c, udp_dport, 0xffff);
+ MLX5E_FTE_SET(headers_v, udp_dport, ntohs(pdst_v));
+ }
+
+ MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff);
+ MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_UDP);
+}
+
+static void
+parse_tcp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec;
+ struct ethtool_tcpip4_spec *l4_val = &fs->h_u.tcp_ip4_spec;
+
+ set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src,
+ l4_mask->ip4dst, l4_val->ip4dst);
+
+ set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_udp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.udp_ip4_spec;
+ struct ethtool_tcpip4_spec *l4_val = &fs->h_u.udp_ip4_spec;
+
+ set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src,
+ l4_mask->ip4dst, l4_val->ip4dst);
+
+ set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_ip4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec;
+ struct ethtool_usrip4_spec *l3_val = &fs->h_u.usr_ip4_spec;
+
+ set_ip4(headers_c, headers_v, l3_mask->ip4src, l3_val->ip4src,
+ l3_mask->ip4dst, l3_val->ip4dst);
+
+ if (l3_mask->proto) {
+ MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->proto);
+ MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->proto);
+ }
+}
+
+static void
+parse_ip6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec;
+ struct ethtool_usrip6_spec *l3_val = &fs->h_u.usr_ip6_spec;
+
+ set_ip6(headers_c, headers_v, l3_mask->ip6src,
+ l3_val->ip6src, l3_mask->ip6dst, l3_val->ip6dst);
+
+ if (l3_mask->l4_proto) {
+ MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->l4_proto);
+ MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->l4_proto);
+ }
+}
+
+static void
+parse_tcp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec;
+ struct ethtool_tcpip6_spec *l4_val = &fs->h_u.tcp_ip6_spec;
+
+ set_ip6(headers_c, headers_v, l4_mask->ip6src,
+ l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst);
+
+ set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_udp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.udp_ip6_spec;
+ struct ethtool_tcpip6_spec *l4_val = &fs->h_u.udp_ip6_spec;
+
+ set_ip6(headers_c, headers_v, l4_mask->ip6src,
+ l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst);
+
+ set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_ether(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethhdr *eth_mask = &fs->m_u.ether_spec;
+ struct ethhdr *eth_val = &fs->h_u.ether_spec;
+
+ mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask));
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, smac_47_16), eth_mask->h_source);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, smac_47_16), eth_val->h_source);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), eth_mask->h_dest);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), eth_val->h_dest);
+ MLX5E_FTE_SET(headers_c, ethertype, ntohs(eth_mask->h_proto));
+ MLX5E_FTE_SET(headers_v, ethertype, ntohs(eth_val->h_proto));
+}
+
+static void
+set_cvlan(void *headers_c, void *headers_v, __be16 vlan_tci)
+{
+ MLX5E_FTE_SET(headers_c, cvlan_tag, 1);
+ MLX5E_FTE_SET(headers_v, cvlan_tag, 1);
+ MLX5E_FTE_SET(headers_c, first_vid, 0xfff);
+ MLX5E_FTE_SET(headers_v, first_vid, ntohs(vlan_tci));
+}
+
+static void
+set_dmac(void *headers_c, void *headers_v,
+ unsigned char m_dest[ETH_ALEN], unsigned char v_dest[ETH_ALEN])
+{
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), m_dest);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), v_dest);
+}
+
+static int set_flow_attrs(u32 *match_c, u32 *match_v,
+ struct ethtool_rx_flow_spec *fs)
+{
+ void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ parse_tcp4(outer_headers_c, outer_headers_v, fs);
+ break;
+ case UDP_V4_FLOW:
+ parse_udp4(outer_headers_c, outer_headers_v, fs);
+ break;
+ case IP_USER_FLOW:
+ parse_ip4(outer_headers_c, outer_headers_v, fs);
+ break;
+ case TCP_V6_FLOW:
+ parse_tcp6(outer_headers_c, outer_headers_v, fs);
+ break;
+ case UDP_V6_FLOW:
+ parse_udp6(outer_headers_c, outer_headers_v, fs);
+ break;
+ case IPV6_USER_FLOW:
+ parse_ip6(outer_headers_c, outer_headers_v, fs);
+ break;
+ case ETHER_FLOW:
+ parse_ether(outer_headers_c, outer_headers_v, fs);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ((fs->flow_type & FLOW_EXT) &&
+ (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK)))
+ set_cvlan(outer_headers_c, outer_headers_v, fs->h_ext.vlan_tci);
+
+ if (fs->flow_type & FLOW_MAC_EXT &&
+ !is_zero_ether_addr(fs->m_ext.h_dest)) {
+ mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN);
+ set_dmac(outer_headers_c, outer_headers_v, fs->m_ext.h_dest,
+ fs->h_ext.h_dest);
+ }
+
+ return 0;
+}
+
+static void add_rule_to_list(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *rule)
+{
+ struct mlx5e_ethtool_rule *iter;
+ struct list_head *head = &priv->fs.ethtool.rules;
+
+ list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+ if (iter->flow_spec.location > rule->flow_spec.location)
+ break;
+ head = &iter->list;
+ }
+ priv->fs.ethtool.tot_num_rules++;
+ list_add(&rule->list, head);
+}
+
+static bool outer_header_zero(u32 *match_criteria)
+{
+ int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers);
+ char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers);
+
+ return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
+ outer_headers_c + 1,
+ size - 1);
+}
+
+static struct mlx5_flow_handle *
+add_ethtool_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_table *ft,
+ struct ethtool_rx_flow_spec *fs)
+{
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_handle *rule;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+ err = set_flow_attrs(spec->match_criteria, spec->match_value,
+ fs);
+ if (err)
+ goto free;
+
+ if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ } else {
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
+ flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
+ __func__, err);
+ goto free;
+ }
+free:
+ kvfree(spec);
+ kfree(dst);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static void del_ethtool_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *eth_rule)
+{
+ if (eth_rule->rule)
+ mlx5_del_flow_rules(eth_rule->rule);
+ list_del(&eth_rule->list);
+ priv->fs.ethtool.tot_num_rules--;
+ put_flow_table(eth_rule->eth_ft);
+ kfree(eth_rule);
+}
+
+static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv,
+ int location)
+{
+ struct mlx5e_ethtool_rule *iter;
+
+ list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+ if (iter->flow_spec.location == location)
+ return iter;
+ }
+ return NULL;
+}
+
+static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv,
+ int location)
+{
+ struct mlx5e_ethtool_rule *eth_rule;
+
+ eth_rule = find_ethtool_rule(priv, location);
+ if (eth_rule)
+ del_ethtool_rule(priv, eth_rule);
+
+ eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL);
+ if (!eth_rule)
+ return ERR_PTR(-ENOMEM);
+
+ add_rule_to_list(priv, eth_rule);
+ return eth_rule;
+}
+
+#define MAX_NUM_OF_ETHTOOL_RULES BIT(10)
+
+#define all_ones(field) (field == (__force typeof(field))-1)
+#define all_zeros_or_all_ones(field) \
+ ((field) == 0 || (field) == (__force typeof(field))-1)
+
+static int validate_ethter(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethhdr *eth_mask = &fs->m_u.ether_spec;
+ int ntuples = 0;
+
+ if (!is_zero_ether_addr(eth_mask->h_dest))
+ ntuples++;
+ if (!is_zero_ether_addr(eth_mask->h_source))
+ ntuples++;
+ if (eth_mask->h_proto)
+ ntuples++;
+ return ntuples;
+}
+
+static int validate_tcpudp4(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec;
+ int ntuples = 0;
+
+ if (l4_mask->tos)
+ return -EINVAL;
+
+ if (l4_mask->ip4src) {
+ if (!all_ones(l4_mask->ip4src))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l4_mask->ip4dst) {
+ if (!all_ones(l4_mask->ip4dst))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l4_mask->psrc) {
+ if (!all_ones(l4_mask->psrc))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l4_mask->pdst) {
+ if (!all_ones(l4_mask->pdst))
+ return -EINVAL;
+ ntuples++;
+ }
+ /* Flow is TCP/UDP */
+ return ++ntuples;
+}
+
+static int validate_ip4(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec;
+ int ntuples = 0;
+
+ if (l3_mask->l4_4_bytes || l3_mask->tos ||
+ fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4)
+ return -EINVAL;
+ if (l3_mask->ip4src) {
+ if (!all_ones(l3_mask->ip4src))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l3_mask->ip4dst) {
+ if (!all_ones(l3_mask->ip4dst))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l3_mask->proto)
+ ntuples++;
+ /* Flow is IPv4 */
+ return ++ntuples;
+}
+
+static int validate_ip6(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec;
+ int ntuples = 0;
+
+ if (l3_mask->l4_4_bytes || l3_mask->tclass)
+ return -EINVAL;
+ if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6src))
+ ntuples++;
+
+ if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6dst))
+ ntuples++;
+ if (l3_mask->l4_proto)
+ ntuples++;
+ /* Flow is IPv6 */
+ return ++ntuples;
+}
+
+static int validate_tcpudp6(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec;
+ int ntuples = 0;
+
+ if (l4_mask->tclass)
+ return -EINVAL;
+
+ if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6src))
+ ntuples++;
+
+ if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6dst))
+ ntuples++;
+
+ if (l4_mask->psrc) {
+ if (!all_ones(l4_mask->psrc))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l4_mask->pdst) {
+ if (!all_ones(l4_mask->pdst))
+ return -EINVAL;
+ ntuples++;
+ }
+ /* Flow is TCP/UDP */
+ return ++ntuples;
+}
+
+static int validate_vlan(struct ethtool_rx_flow_spec *fs)
+{
+ if (fs->m_ext.vlan_etype ||
+ fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK))
+ return -EINVAL;
+
+ if (fs->m_ext.vlan_tci &&
+ (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID))
+ return -EINVAL;
+
+ return 1;
+}
+
+static int validate_flow(struct mlx5e_priv *priv,
+ struct ethtool_rx_flow_spec *fs)
+{
+ int num_tuples = 0;
+ int ret = 0;
+
+ if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
+ return -ENOSPC;
+
+ if (fs->ring_cookie >= priv->channels.params.num_channels &&
+ fs->ring_cookie != RX_CLS_FLOW_DISC)
+ return -EINVAL;
+
+ switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ case ETHER_FLOW:
+ num_tuples += validate_ethter(fs);
+ break;
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ ret = validate_tcpudp4(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ case IP_USER_FLOW:
+ ret = validate_ip4(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ ret = validate_tcpudp6(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ case IPV6_USER_FLOW:
+ ret = validate_ip6(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+ if ((fs->flow_type & FLOW_EXT)) {
+ ret = validate_vlan(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ }
+
+ if (fs->flow_type & FLOW_MAC_EXT &&
+ !is_zero_ether_addr(fs->m_ext.h_dest))
+ num_tuples++;
+
+ return num_tuples;
+}
+
+static int
+mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+ struct ethtool_rx_flow_spec *fs)
+{
+ struct mlx5e_ethtool_table *eth_ft;
+ struct mlx5e_ethtool_rule *eth_rule;
+ struct mlx5_flow_handle *rule;
+ int num_tuples;
+ int err;
+
+ num_tuples = validate_flow(priv, fs);
+ if (num_tuples <= 0) {
+ netdev_warn(priv->netdev, "%s: flow is not valid %d\n",
+ __func__, num_tuples);
+ return num_tuples;
+ }
+
+ eth_ft = get_flow_table(priv, fs, num_tuples);
+ if (IS_ERR(eth_ft))
+ return PTR_ERR(eth_ft);
+
+ eth_rule = get_ethtool_rule(priv, fs->location);
+ if (IS_ERR(eth_rule)) {
+ put_flow_table(eth_ft);
+ return PTR_ERR(eth_rule);
+ }
+
+ eth_rule->flow_spec = *fs;
+ eth_rule->eth_ft = eth_ft;
+ if (!eth_ft->ft) {
+ err = -EINVAL;
+ goto del_ethtool_rule;
+ }
+ rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto del_ethtool_rule;
+ }
+
+ eth_rule->rule = rule;
+
+ return 0;
+
+del_ethtool_rule:
+ del_ethtool_rule(priv, eth_rule);
+
+ return err;
+}
+
+static int
+mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location)
+{
+ struct mlx5e_ethtool_rule *eth_rule;
+ int err = 0;
+
+ if (location >= MAX_NUM_OF_ETHTOOL_RULES)
+ return -ENOSPC;
+
+ eth_rule = find_ethtool_rule(priv, location);
+ if (!eth_rule) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ del_ethtool_rule(priv, eth_rule);
+out:
+ return err;
+}
+
+static int
+mlx5e_ethtool_get_flow(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, int location)
+{
+ struct mlx5e_ethtool_rule *eth_rule;
+
+ if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES)
+ return -EINVAL;
+
+ list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) {
+ if (eth_rule->flow_spec.location == location) {
+ info->fs = eth_rule->flow_spec;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int
+mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+ int location = 0;
+ int idx = 0;
+ int err = 0;
+
+ info->data = MAX_NUM_OF_ETHTOOL_RULES;
+ while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
+ err = mlx5e_ethtool_get_flow(priv, info, location);
+ if (!err)
+ rule_locs[idx++] = location;
+ location++;
+ }
+ return err;
+}
+
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ethtool_rule *iter;
+ struct mlx5e_ethtool_rule *temp;
+
+ list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list)
+ del_ethtool_rule(priv, iter);
+}
+
+void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
+{
+ INIT_LIST_HEAD(&priv->fs.ethtool.rules);
+}
+
+int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ int err = 0;
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ err = mlx5e_ethtool_flow_replace(priv, &cmd->fs);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+int mlx5e_get_rxnfc(struct net_device *dev,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err = 0;
+
+ switch (info->cmd) {
+ case ETHTOOL_GRXRINGS:
+ info->data = priv->channels.params.num_channels;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ info->rule_cnt = priv->fs.ethtool.tot_num_rules;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ err = mlx5e_ethtool_get_flow(priv, info, info->fs.location);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
new file mode 100644
index 000000000..6ecb92f55
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -0,0 +1,5221 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tc_act/tc_gact.h>
+#include <net/pkt_cls.h>
+#include <linux/mlx5/fs.h>
+#include <net/vxlan.h>
+#include <linux/bpf.h>
+#include <net/page_pool.h>
+#include "eswitch.h"
+#include "en.h"
+#include "en_tc.h"
+#include "en_rep.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls.h"
+#include "accel/ipsec.h"
+#include "accel/tls.h"
+#include "lib/vxlan.h"
+#include "lib/clock.h"
+#include "en/port.h"
+#include "en/xdp.h"
+
+struct mlx5e_rq_param {
+ u32 rqc[MLX5_ST_SZ_DW(rqc)];
+ struct mlx5_wq_param wq;
+ struct mlx5e_rq_frags_info frags_info;
+};
+
+struct mlx5e_sq_param {
+ u32 sqc[MLX5_ST_SZ_DW(sqc)];
+ struct mlx5_wq_param wq;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc[MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+ u16 eq_ix;
+ u8 cq_period_mode;
+};
+
+struct mlx5e_channel_param {
+ struct mlx5e_rq_param rq;
+ struct mlx5e_sq_param sq;
+ struct mlx5e_sq_param xdp_sq;
+ struct mlx5e_sq_param icosq;
+ struct mlx5e_cq_param rx_cq;
+ struct mlx5e_cq_param tx_cq;
+ struct mlx5e_cq_param icosq_cq;
+};
+
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
+{
+ bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
+ MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+ MLX5_CAP_ETH(mdev, reg_umr_sq);
+ u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
+ bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
+
+ if (!striding_rq_umr)
+ return false;
+ if (!inline_umr) {
+ mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n",
+ (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap);
+ return false;
+ }
+ return true;
+}
+
+static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
+{
+ u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ u16 linear_rq_headroom = params->xdp_prog ?
+ XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
+ u32 frag_sz;
+
+ linear_rq_headroom += NET_IP_ALIGN;
+
+ frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
+
+ if (params->xdp_prog && frag_sz < PAGE_SIZE)
+ frag_sz = PAGE_SIZE;
+
+ return frag_sz;
+}
+
+static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
+{
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+
+ return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
+}
+
+static bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+
+ return !params->lro_en && frag_sz <= PAGE_SIZE;
+}
+
+#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
+ MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
+static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ s8 signed_log_num_strides_param;
+ u8 log_num_strides;
+
+ if (!mlx5e_rx_is_linear_skb(mdev, params))
+ return false;
+
+ if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+ return false;
+
+ if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
+ return true;
+
+ log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
+ signed_log_num_strides_param =
+ (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+
+ return signed_log_num_strides_param >= 0;
+}
+
+static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
+{
+ if (params->log_rq_mtu_frames <
+ mlx5e_mpwqe_log_pkts_per_wqe(params) + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
+ return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+
+ return params->log_rq_mtu_frames - mlx5e_mpwqe_log_pkts_per_wqe(params);
+}
+
+static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+ return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
+
+ return MLX5E_MPWQE_STRIDE_SZ(mdev,
+ MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+}
+
+static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return MLX5_MPWRQ_LOG_WQE_SZ -
+ mlx5e_mpwqe_get_log_stride_size(mdev, params);
+}
+
+static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u16 linear_rq_headroom = params->xdp_prog ?
+ XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
+ bool is_linear_skb;
+
+ linear_rq_headroom += NET_IP_ALIGN;
+
+ is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
+ mlx5e_rx_is_linear_skb(mdev, params) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
+
+ return is_linear_skb ? linear_rq_headroom : 0;
+}
+
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+ params->log_rq_mtu_frames = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+ MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+ mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+ BIT(mlx5e_mpwqe_get_log_rq_size(params)) :
+ BIT(params->log_rq_mtu_frames),
+ BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
+ MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+}
+
+bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
+ !MLX5_IPSEC_DEV(mdev) &&
+ !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params));
+}
+
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
+ MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
+ MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+ MLX5_WQ_TYPE_CYCLIC;
+}
+
+static void mlx5e_update_carrier(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 port_state;
+
+ port_state = mlx5_query_vport_state(mdev,
+ MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
+ 0);
+
+ if (port_state == VPORT_STATE_UP) {
+ netdev_info(priv->netdev, "Link up\n");
+ netif_carrier_on(priv->netdev);
+ } else {
+ netdev_info(priv->netdev, "Link down\n");
+ netif_carrier_off(priv->netdev);
+ }
+}
+
+static void mlx5e_update_carrier_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ update_carrier_work);
+
+ mutex_lock(&priv->state_lock);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ if (priv->profile->update_carrier)
+ priv->profile->update_carrier(priv);
+ mutex_unlock(&priv->state_lock);
+}
+
+void mlx5e_update_stats(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = mlx5e_num_stats_grps - 1; i >= 0; i--)
+ if (mlx5e_stats_grps[i].update_stats)
+ mlx5e_stats_grps[i].update_stats(priv);
+}
+
+static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = mlx5e_num_stats_grps - 1; i >= 0; i--)
+ if (mlx5e_stats_grps[i].update_stats_mask &
+ MLX5E_NDO_UPDATE_STATS)
+ mlx5e_stats_grps[i].update_stats(priv);
+}
+
+void mlx5e_update_stats_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv,
+ update_stats_work);
+
+ mutex_lock(&priv->state_lock);
+ priv->profile->update_stats(priv);
+ mutex_unlock(&priv->state_lock);
+}
+
+static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
+ enum mlx5_dev_event event, unsigned long param)
+{
+ struct mlx5e_priv *priv = vpriv;
+
+ if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
+ return;
+
+ switch (event) {
+ case MLX5_DEV_EVENT_PORT_UP:
+ case MLX5_DEV_EVENT_PORT_DOWN:
+ queue_work(priv->wq, &priv->update_carrier_work);
+ break;
+ default:
+ break;
+ }
+}
+
+static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+}
+
+static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
+{
+ clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+ synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
+}
+
+static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
+ struct mlx5e_icosq *sq,
+ struct mlx5e_umr_wqe *wqe)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+ u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ ds_cnt);
+ cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ cseg->imm = rq->mkey_be;
+
+ ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
+ ucseg->xlt_octowords =
+ cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
+ ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+}
+
+static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_size(&rq->wqe.wq);
+ }
+}
+
+static u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return rq->mpwqe.wq.cur_sz;
+ default:
+ return rq->wqe.wq.cur_sz;
+ }
+}
+
+static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
+ struct mlx5e_channel *c)
+{
+ int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+
+ rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
+ sizeof(*rq->mpwqe.info)),
+ GFP_KERNEL, cpu_to_node(c->cpu));
+ if (!rq->mpwqe.info)
+ return -ENOMEM;
+
+ mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe);
+
+ return 0;
+}
+
+static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
+ u64 npages, u8 page_shift,
+ struct mlx5_core_mkey *umr_mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+ MLX5_SET64(mkc, mkc, len, npages << page_shift);
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ MLX5_MTT_OCTW(npages));
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+
+ err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
+{
+ u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
+
+ return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
+}
+
+static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
+{
+ return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT;
+}
+
+static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
+{
+ struct mlx5e_wqe_frag_info next_frag = {};
+ struct mlx5e_wqe_frag_info *prev = NULL;
+ int i;
+
+ next_frag.di = &rq->wqe.di[0];
+
+ for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
+ struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
+ struct mlx5e_wqe_frag_info *frag =
+ &rq->wqe.frags[i << rq->wqe.info.log_num_frags];
+ int f;
+
+ for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
+ if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
+ next_frag.di++;
+ next_frag.offset = 0;
+ if (prev)
+ prev->last_in_page = true;
+ }
+ *frag = next_frag;
+
+ /* prepare next */
+ next_frag.offset += frag_info[f].frag_stride;
+ prev = frag;
+ }
+ }
+
+ if (prev)
+ prev->last_in_page = true;
+}
+
+static int mlx5e_init_di_list(struct mlx5e_rq *rq,
+ struct mlx5e_params *params,
+ int wq_sz, int cpu)
+{
+ int len = wq_sz << rq->wqe.info.log_num_frags;
+
+ rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!rq->wqe.di)
+ return -ENOMEM;
+
+ mlx5e_init_frags_partition(rq);
+
+ return 0;
+}
+
+static void mlx5e_free_di_list(struct mlx5e_rq *rq)
+{
+ kvfree(rq->wqe.di);
+}
+
+static int mlx5e_alloc_rq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rqp,
+ struct mlx5e_rq *rq)
+{
+ struct page_pool_params pp_params = { 0 };
+ struct mlx5_core_dev *mdev = c->mdev;
+ void *rqc = rqp->rqc;
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ u32 pool_size;
+ int wq_sz;
+ int err;
+ int i;
+
+ rqp->wq.db_numa_node = cpu_to_node(c->cpu);
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = c->netdev;
+ rq->tstamp = c->tstamp;
+ rq->clock = &mdev->clock;
+ rq->channel = c;
+ rq->ix = c->ix;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->stats = &c->priv->channel_stats[c->ix].rq;
+
+ rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
+ if (IS_ERR(rq->xdp_prog)) {
+ err = PTR_ERR(rq->xdp_prog);
+ rq->xdp_prog = NULL;
+ goto err_rq_wq_destroy;
+ }
+
+ err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix);
+ if (err < 0)
+ goto err_rq_wq_destroy;
+
+ rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+ rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
+ pool_size = 1 << params->log_rq_mtu_frames;
+
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
+ &rq->wq_ctrl);
+ if (err)
+ goto err_rq_wq_destroy;
+
+ rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
+
+ wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+
+ pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
+
+ rq->post_wqes = mlx5e_post_rx_mpwqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
+
+ rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (MLX5_IPSEC_DEV(mdev)) {
+ err = -EINVAL;
+ netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n");
+ goto err_rq_wq_destroy;
+ }
+#endif
+ if (!rq->handle_rx_cqe) {
+ err = -EINVAL;
+ netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err);
+ goto err_rq_wq_destroy;
+ }
+
+ rq->mpwqe.skb_from_cqe_mpwrq =
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ?
+ mlx5e_skb_from_cqe_mpwrq_linear :
+ mlx5e_skb_from_cqe_mpwrq_nonlinear;
+ rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params);
+ rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params));
+
+ err = mlx5e_create_rq_umr_mkey(mdev, rq);
+ if (err)
+ goto err_rq_wq_destroy;
+ rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
+
+ err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+ if (err)
+ goto err_free;
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
+ &rq->wq_ctrl);
+ if (err)
+ goto err_rq_wq_destroy;
+
+ rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
+
+ wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
+
+ rq->wqe.info = rqp->frags_info;
+ rq->wqe.frags =
+ kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
+ (wq_sz << rq->wqe.info.log_num_frags)),
+ GFP_KERNEL, cpu_to_node(c->cpu));
+ if (!rq->wqe.frags) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ err = mlx5e_init_di_list(rq, params, wq_sz, c->cpu);
+ if (err)
+ goto err_free;
+ rq->post_wqes = mlx5e_post_rx_wqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (c->priv->ipsec)
+ rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
+ else
+#endif
+ rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
+ if (!rq->handle_rx_cqe) {
+ err = -EINVAL;
+ netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err);
+ goto err_free;
+ }
+
+ rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(mdev, params) ?
+ mlx5e_skb_from_cqe_linear :
+ mlx5e_skb_from_cqe_nonlinear;
+ rq->mkey_be = c->mkey_be;
+ }
+
+ /* Create a page_pool and register it with rxq */
+ pp_params.order = 0;
+ pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
+ pp_params.pool_size = pool_size;
+ pp_params.nid = cpu_to_node(c->cpu);
+ pp_params.dev = c->pdev;
+ pp_params.dma_dir = rq->buff.map_dir;
+
+ /* page_pool can be used even when there is no rq->xdp_prog,
+ * given page_pool does not handle DMA mapping there is no
+ * required state to clear. And page_pool gracefully handle
+ * elevated refcnt.
+ */
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ err = PTR_ERR(rq->page_pool);
+ rq->page_pool = NULL;
+ goto err_free;
+ }
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_PAGE_POOL, rq->page_pool);
+ if (err)
+ goto err_free;
+
+ for (i = 0; i < wq_sz; i++) {
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ struct mlx5e_rx_wqe_ll *wqe =
+ mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
+ u32 byte_count =
+ rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
+ u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i);
+
+ wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
+ wqe->data[0].byte_count = cpu_to_be32(byte_count);
+ wqe->data[0].lkey = rq->mkey_be;
+ } else {
+ struct mlx5e_rx_wqe_cyc *wqe =
+ mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
+ int f;
+
+ for (f = 0; f < rq->wqe.info.num_frags; f++) {
+ u32 frag_size = rq->wqe.info.arr[f].frag_size |
+ MLX5_HW_START_PADDING;
+
+ wqe->data[f].byte_count = cpu_to_be32(frag_size);
+ wqe->data[f].lkey = rq->mkey_be;
+ }
+ /* check if num_frags is not a pow of two */
+ if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
+ wqe->data[f].byte_count = 0;
+ wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ wqe->data[f].addr = 0;
+ }
+ }
+ }
+
+ INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work);
+
+ switch (params->rx_cq_moderation.cq_period_mode) {
+ case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
+ rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+ break;
+ case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
+ default:
+ rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ }
+
+ rq->page_cache.head = 0;
+ rq->page_cache.tail = 0;
+
+ return 0;
+
+err_free:
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ kvfree(rq->mpwqe.info);
+ mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ kvfree(rq->wqe.frags);
+ mlx5e_free_di_list(rq);
+ }
+
+err_rq_wq_destroy:
+ if (rq->xdp_prog)
+ bpf_prog_put(rq->xdp_prog);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ if (rq->page_pool)
+ page_pool_destroy(rq->page_pool);
+ mlx5_wq_destroy(&rq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_rq(struct mlx5e_rq *rq)
+{
+ int i;
+
+ if (rq->xdp_prog)
+ bpf_prog_put(rq->xdp_prog);
+
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ if (rq->page_pool)
+ page_pool_destroy(rq->page_pool);
+
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ kvfree(rq->mpwqe.info);
+ mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ kvfree(rq->wqe.frags);
+ mlx5e_free_di_list(rq);
+ }
+
+ for (i = rq->page_cache.head; i != rq->page_cache.tail;
+ i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
+ struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
+
+ mlx5e_page_release(rq, dma_info, false);
+ }
+ mlx5_wq_destroy(&rq->wq_ctrl);
+}
+
+static int mlx5e_create_rq(struct mlx5e_rq *rq,
+ struct mlx5e_rq_param *param)
+{
+ struct mlx5_core_dev *mdev = rq->mdev;
+
+ void *in;
+ void *rqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
+ sizeof(u64) * rq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ memcpy(rqc, param->rqc, sizeof(param->rqc));
+
+ MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state,
+ int next_state)
+{
+ struct mlx5_core_dev *mdev = rq->mdev;
+
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rq_state, curr_state);
+ MLX5_SET(rqc, rqc, state, next_state);
+
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS);
+ MLX5_SET(rqc, rqc, scatter_fcs, enable);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
+
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5_core_dev *mdev = c->mdev;
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
+ MLX5_SET(rqc, rqc, vsd, vsd);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
+
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
+{
+ mlx5_core_destroy_rq(rq->mdev, rq->rqn);
+}
+
+static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
+{
+ unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
+ struct mlx5e_channel *c = rq->channel;
+
+ u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq));
+
+ do {
+ if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes)
+ return 0;
+
+ msleep(20);
+ } while (time_before(jiffies, exp_time));
+
+ netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
+ c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
+
+ return -ETIMEDOUT;
+}
+
+static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+ __be16 wqe_ix_be;
+ u16 wqe_ix;
+
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+
+ /* UMR WQE (if in progress) is always at wq->head */
+ if (rq->mpwqe.umr_in_progress)
+ rq->dealloc_wqe(rq, wq->head);
+
+ while (!mlx5_wq_ll_is_empty(wq)) {
+ struct mlx5e_rx_wqe_ll *wqe;
+
+ wqe_ix_be = *wq->tail_next;
+ wqe_ix = be16_to_cpu(wqe_ix_be);
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_ix);
+ rq->dealloc_wqe(rq, wqe_ix);
+ mlx5_wq_ll_pop(wq, wqe_ix_be,
+ &wqe->next.next_wqe_index);
+ }
+ } else {
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+
+ while (!mlx5_wq_cyc_is_empty(wq)) {
+ wqe_ix = mlx5_wq_cyc_get_tail(wq);
+ rq->dealloc_wqe(rq, wqe_ix);
+ mlx5_wq_cyc_pop(wq);
+ }
+ }
+
+}
+
+static int mlx5e_open_rq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *param,
+ struct mlx5e_rq *rq)
+{
+ int err;
+
+ err = mlx5e_alloc_rq(c, params, param, rq);
+ if (err)
+ return err;
+
+ err = mlx5e_create_rq(rq, param);
+ if (err)
+ goto err_free_rq;
+
+ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err)
+ goto err_destroy_rq;
+
+ if (params->rx_dim_enabled)
+ __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
+
+ /* We disable csum_complete when XDP is enabled since
+ * XDP programs might manipulate packets which will render
+ * skb->checksum incorrect.
+ */
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
+ __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+
+ return 0;
+
+err_destroy_rq:
+ mlx5e_destroy_rq(rq);
+err_free_rq:
+ mlx5e_free_rq(rq);
+
+ return err;
+}
+
+static void mlx5e_activate_rq(struct mlx5e_rq *rq)
+{
+ struct mlx5e_icosq *sq = &rq->channel->icosq;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_tx_wqe *nopwqe;
+
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+ sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+ nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
+}
+
+static void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+{
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+ napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
+}
+
+static void mlx5e_close_rq(struct mlx5e_rq *rq)
+{
+ cancel_work_sync(&rq->dim.work);
+ mlx5e_destroy_rq(rq);
+ mlx5e_free_rx_descs(rq);
+ mlx5e_free_rq(rq);
+}
+
+static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
+{
+ kvfree(sq->db.xdpi);
+}
+
+static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+
+ sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)),
+ GFP_KERNEL, numa);
+ if (!sq->db.xdpi) {
+ mlx5e_free_xdpsq_db(sq);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_xdpsq *sq,
+ bool is_redirect)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+
+ sq->pdev = c->pdev;
+ sq->mkey_be = c->mkey_be;
+ sq->channel = c;
+ sq->uar_map = mdev->mlx5e_res.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ sq->stats = is_redirect ?
+ &c->priv->channel_stats[c->ix].xdpsq :
+ &c->priv->channel_stats[c->ix].rq_xdpsq;
+
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
+ if (err)
+ goto err_sq_wq_destroy;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
+{
+ mlx5e_free_xdpsq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
+{
+ kvfree(sq->db.ico_wqe);
+}
+
+static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
+{
+ u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+
+ sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz,
+ sizeof(*sq->db.ico_wqe)),
+ GFP_KERNEL, numa);
+ if (!sq->db.ico_wqe)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_icosq *sq)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+
+ sq->channel = c;
+ sq->uar_map = mdev->mlx5e_res.bfreg.map;
+
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
+ if (err)
+ goto err_sq_wq_destroy;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
+{
+ mlx5e_free_icosq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
+{
+ kvfree(sq->db.wqe_info);
+ kvfree(sq->db.dma_fifo);
+}
+
+static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+
+ sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
+ sizeof(*sq->db.dma_fifo)),
+ GFP_KERNEL, numa);
+ sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
+ sizeof(*sq->db.wqe_info)),
+ GFP_KERNEL, numa);
+ if (!sq->db.dma_fifo || !sq->db.wqe_info) {
+ mlx5e_free_txqsq_db(sq);
+ return -ENOMEM;
+ }
+
+ sq->dma_fifo_mask = df_sz - 1;
+
+ return 0;
+}
+
+static void mlx5e_sq_recover(struct work_struct *work);
+static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
+ int txq_ix,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq,
+ int tc)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+
+ sq->pdev = c->pdev;
+ sq->tstamp = c->tstamp;
+ sq->clock = &mdev->clock;
+ sq->mkey_be = c->mkey_be;
+ sq->channel = c;
+ sq->txq_ix = txq_ix;
+ sq->uar_map = mdev->mlx5e_res.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
+ INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
+ if (MLX5_IPSEC_DEV(c->priv->mdev))
+ set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
+ if (mlx5_accel_is_tls_device(c->priv->mdev))
+ set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
+
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
+ if (err)
+ goto err_sq_wq_destroy;
+
+ INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
+ sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
+{
+ mlx5e_free_txqsq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+struct mlx5e_create_sq_param {
+ struct mlx5_wq_ctrl *wq_ctrl;
+ u32 cqn;
+ u32 tisn;
+ u8 tis_lst_sz;
+ u8 min_inline_mode;
+};
+
+static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_create_sq_param *csp,
+ u32 *sqn)
+{
+ void *in;
+ void *sqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) * csp->wq_ctrl->buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ memcpy(sqc, param->sqc, sizeof(param->sqc));
+ MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz);
+ MLX5_SET(sqc, sqc, tis_num_0, csp->tisn);
+ MLX5_SET(sqc, sqc, cqn, csp->cqn);
+
+ if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+ MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode);
+
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index);
+ MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma);
+
+ mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_sq(mdev, in, inlen, sqn);
+
+ kvfree(in);
+
+ return err;
+}
+
+struct mlx5e_modify_sq_param {
+ int curr_state;
+ int next_state;
+ bool rl_update;
+ int rl_index;
+};
+
+static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+ struct mlx5e_modify_sq_param *p)
+{
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+ MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
+ MLX5_SET(sqc, sqc, state, p->next_state);
+ if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
+ MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
+ }
+
+ err = mlx5_core_modify_sq(mdev, sqn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
+{
+ mlx5_core_destroy_sq(mdev, sqn);
+}
+
+static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_create_sq_param *csp,
+ u32 *sqn)
+{
+ struct mlx5e_modify_sq_param msp = {0};
+ int err;
+
+ err = mlx5e_create_sq(mdev, param, csp, sqn);
+ if (err)
+ return err;
+
+ msp.curr_state = MLX5_SQC_STATE_RST;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+ err = mlx5e_modify_sq(mdev, *sqn, &msp);
+ if (err)
+ mlx5e_destroy_sq(mdev, *sqn);
+
+ return err;
+}
+
+static int mlx5e_set_sq_maxrate(struct net_device *dev,
+ struct mlx5e_txqsq *sq, u32 rate);
+
+static int mlx5e_open_txqsq(struct mlx5e_channel *c,
+ u32 tisn,
+ int txq_ix,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq,
+ int tc)
+{
+ struct mlx5e_create_sq_param csp = {};
+ u32 tx_rate;
+ int err;
+
+ err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc);
+ if (err)
+ return err;
+
+ csp.tisn = tisn;
+ csp.tis_lst_sz = 1;
+ csp.cqn = sq->cq.mcq.cqn;
+ csp.wq_ctrl = &sq->wq_ctrl;
+ csp.min_inline_mode = sq->min_inline_mode;
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+ if (err)
+ goto err_free_txqsq;
+
+ tx_rate = c->priv->tx_rates[sq->txq_ix];
+ if (tx_rate)
+ mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
+
+ if (params->tx_dim_enabled)
+ sq->state |= BIT(MLX5E_SQ_STATE_AM);
+
+ return 0;
+
+err_free_txqsq:
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ mlx5e_free_txqsq(sq);
+
+ return err;
+}
+
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+ WARN_ONCE(sq->cc != sq->pc,
+ "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+ sq->cc = 0;
+ sq->dma_fifo_cc = 0;
+ sq->pc = 0;
+}
+
+static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
+{
+ sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ netdev_tx_reset_queue(sq->txq);
+ netif_tx_start_queue(sq->txq);
+}
+
+static inline void netif_tx_disable_queue(struct netdev_queue *txq)
+{
+ __netif_tx_lock_bh(txq);
+ netif_tx_stop_queue(txq);
+ __netif_tx_unlock_bh(txq);
+}
+
+static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ /* prevent netif_tx_wake_queue */
+ napi_synchronize(&c->napi);
+
+ netif_tx_disable_queue(sq->txq);
+
+ /* last doorbell out, godspeed .. */
+ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_tx_wqe *nop;
+
+ sq->db.wqe_info[pi].skb = NULL;
+ nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
+ }
+}
+
+static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_rate_limit rl = {0};
+
+ cancel_work_sync(&sq->dim.work);
+ mlx5e_destroy_sq(mdev, sq->sqn);
+ if (sq->rate_limit) {
+ rl.rate = sq->rate_limit;
+ mlx5_rl_remove_rate(mdev, &rl);
+ }
+ mlx5e_free_txqsq_descs(sq);
+ mlx5e_free_txqsq(sq);
+}
+
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+ unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+ while (time_before(jiffies, exp_time)) {
+ if (sq->cc == sq->pc)
+ return 0;
+
+ msleep(20);
+ }
+
+ netdev_err(sq->channel->netdev,
+ "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+
+ return -ETIMEDOUT;
+}
+
+static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
+{
+ struct mlx5_core_dev *mdev = sq->channel->mdev;
+ struct net_device *dev = sq->channel->netdev;
+ struct mlx5e_modify_sq_param msp = {0};
+ int err;
+
+ msp.curr_state = curr_state;
+ msp.next_state = MLX5_SQC_STATE_RST;
+
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
+ return err;
+ }
+
+ memset(&msp, 0, sizeof(msp));
+ msp.curr_state = MLX5_SQC_STATE_RST;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx5e_sq_recover(struct work_struct *work)
+{
+ struct mlx5e_txqsq_recover *recover =
+ container_of(work, struct mlx5e_txqsq_recover,
+ recover_work);
+ struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
+ recover);
+ struct mlx5_core_dev *mdev = sq->channel->mdev;
+ struct net_device *dev = sq->channel->netdev;
+ u8 state;
+ int err;
+
+ err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+ if (err) {
+ netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+ sq->sqn, err);
+ return;
+ }
+
+ if (state != MLX5_RQC_STATE_ERR) {
+ netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
+ return;
+ }
+
+ netif_tx_disable_queue(sq->txq);
+
+ if (mlx5e_wait_for_sq_flush(sq))
+ return;
+
+ /* If the interval between two consecutive recovers per SQ is too
+ * short, don't recover to avoid infinite loop of ERR_CQE -> recover.
+ * If we reached this state, there is probably a bug that needs to be
+ * fixed. let's keep the queue close and let tx timeout cleanup.
+ */
+ if (jiffies_to_msecs(jiffies - recover->last_recover) <
+ MLX5E_SQ_RECOVER_MIN_INTERVAL) {
+ netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
+ sq->sqn);
+ return;
+ }
+
+ /* At this point, no new packets will arrive from the stack as TXQ is
+ * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+ * pending WQEs. SQ can safely reset the SQ.
+ */
+ if (mlx5e_sq_to_ready(sq, state))
+ return;
+
+ mlx5e_reset_txqsq_cc_pc(sq);
+ sq->stats->recover++;
+ recover->last_recover = jiffies;
+ mlx5e_activate_txqsq(sq);
+}
+
+static int mlx5e_open_icosq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_icosq *sq)
+{
+ struct mlx5e_create_sq_param csp = {};
+ int err;
+
+ err = mlx5e_alloc_icosq(c, param, sq);
+ if (err)
+ return err;
+
+ csp.cqn = sq->cq.mcq.cqn;
+ csp.wq_ctrl = &sq->wq_ctrl;
+ csp.min_inline_mode = params->tx_min_inline_mode;
+ set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+ if (err)
+ goto err_free_icosq;
+
+ return 0;
+
+err_free_icosq:
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ mlx5e_free_icosq(sq);
+
+ return err;
+}
+
+static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ napi_synchronize(&c->napi);
+
+ mlx5e_destroy_sq(c->mdev, sq->sqn);
+ mlx5e_free_icosq(sq);
+}
+
+static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_xdpsq *sq,
+ bool is_redirect)
+{
+ unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+ struct mlx5e_create_sq_param csp = {};
+ unsigned int inline_hdr_sz = 0;
+ int err;
+ int i;
+
+ err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
+ if (err)
+ return err;
+
+ csp.tis_lst_sz = 1;
+ csp.tisn = c->priv->tisn[0]; /* tc = 0 */
+ csp.cqn = sq->cq.mcq.cqn;
+ csp.wq_ctrl = &sq->wq_ctrl;
+ csp.min_inline_mode = sq->min_inline_mode;
+ if (is_redirect)
+ set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
+ set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+ if (err)
+ goto err_free_xdpsq;
+
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ ds_cnt++;
+ }
+
+ /* Pre initialize fixed WQE fields */
+ for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg;
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+
+ dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
+ dseg->lkey = sq->mkey_be;
+ }
+
+ return 0;
+
+err_free_xdpsq:
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ mlx5e_free_xdpsq(sq);
+
+ return err;
+}
+
+static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ napi_synchronize(&c->napi);
+
+ mlx5e_destroy_sq(c->mdev, sq->sqn);
+ mlx5e_free_xdpsq_descs(sq);
+ mlx5e_free_xdpsq(sq);
+}
+
+static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ int eqn_not_used;
+ unsigned int irqn;
+ int err;
+ u32 i;
+
+ err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
+ if (err)
+ return err;
+
+ err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
+ &cq->wq_ctrl);
+ if (err)
+ return err;
+
+ mcq->cqe_sz = 64;
+ mcq->set_ci_db = cq->wq_ctrl.db.db;
+ mcq->arm_db = cq->wq_ctrl.db.db + 1;
+ *mcq->set_ci_db = 0;
+ *mcq->arm_db = 0;
+ mcq->vector = param->eq_ix;
+ mcq->comp = mlx5e_completion_event;
+ mcq->event = mlx5e_cq_error_event;
+ mcq->irqn = irqn;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+ cqe->op_own = 0xf1;
+ }
+
+ cq->mdev = mdev;
+
+ return 0;
+}
+
+static int mlx5e_alloc_cq(struct mlx5e_channel *c,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq)
+{
+ struct mlx5_core_dev *mdev = c->priv->mdev;
+ int err;
+
+ param->wq.buf_numa_node = cpu_to_node(c->cpu);
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ param->eq_ix = c->ix;
+
+ err = mlx5e_alloc_cq_common(mdev, param, cq);
+
+ cq->napi = &c->napi;
+ cq->channel = c;
+
+ return err;
+}
+
+static void mlx5e_free_cq(struct mlx5e_cq *cq)
+{
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
+{
+ struct mlx5_core_dev *mdev = cq->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+
+ void *in;
+ void *cqc;
+ int inlen;
+ unsigned int irqn_not_used;
+ int eqn;
+ int err;
+
+ err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+ memcpy(cqc, param->cqc, sizeof(param->cqc));
+
+ mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+
+ MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode);
+ MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen);
+
+ kvfree(in);
+
+ if (err)
+ return err;
+
+ mlx5e_cq_arm(cq);
+
+ return 0;
+}
+
+static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
+{
+ mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
+}
+
+static int mlx5e_open_cq(struct mlx5e_channel *c,
+ struct net_dim_cq_moder moder,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ int err;
+
+ err = mlx5e_alloc_cq(c, param, cq);
+ if (err)
+ return err;
+
+ err = mlx5e_create_cq(cq, param);
+ if (err)
+ goto err_free_cq;
+
+ if (MLX5_CAP_GEN(mdev, cq_moderation))
+ mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts);
+ return 0;
+
+err_free_cq:
+ mlx5e_free_cq(cq);
+
+ return err;
+}
+
+static void mlx5e_close_cq(struct mlx5e_cq *cq)
+{
+ mlx5e_destroy_cq(cq);
+ mlx5e_free_cq(cq);
+}
+
+static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
+{
+ return cpumask_first(priv->mdev->priv.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask);
+}
+
+static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
+{
+ int err;
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ err = mlx5e_open_cq(c, params->tx_cq_moderation,
+ &cparam->tx_cq, &c->sq[tc].cq);
+ if (err)
+ goto err_close_tx_cqs;
+ }
+
+ return 0;
+
+err_close_tx_cqs:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_cq(&c->sq[tc].cq);
+
+ return err;
+}
+
+static void mlx5e_close_tx_cqs(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->sq[tc].cq);
+}
+
+static int mlx5e_open_sqs(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
+{
+ struct mlx5e_priv *priv = c->priv;
+ int err, tc, max_nch = priv->profile->max_nch(priv->mdev);
+
+ for (tc = 0; tc < params->num_tc; tc++) {
+ int txq_ix = c->ix + tc * max_nch;
+
+ err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix,
+ params, &cparam->sq, &c->sq[tc], tc);
+ if (err)
+ goto err_close_sqs;
+ }
+
+ return 0;
+
+err_close_sqs:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_txqsq(&c->sq[tc]);
+
+ return err;
+}
+
+static void mlx5e_close_sqs(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_txqsq(&c->sq[tc]);
+}
+
+static int mlx5e_set_sq_maxrate(struct net_device *dev,
+ struct mlx5e_txqsq *sq, u32 rate)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_modify_sq_param msp = {0};
+ struct mlx5_rate_limit rl = {0};
+ u16 rl_index = 0;
+ int err;
+
+ if (rate == sq->rate_limit)
+ /* nothing to do */
+ return 0;
+
+ if (sq->rate_limit) {
+ rl.rate = sq->rate_limit;
+ /* remove current rl index to free space to next ones */
+ mlx5_rl_remove_rate(mdev, &rl);
+ }
+
+ sq->rate_limit = 0;
+
+ if (rate) {
+ rl.rate = rate;
+ err = mlx5_rl_add_rate(mdev, &rl_index, &rl);
+ if (err) {
+ netdev_err(dev, "Failed configuring rate %u: %d\n",
+ rate, err);
+ return err;
+ }
+ }
+
+ msp.curr_state = MLX5_SQC_STATE_RDY;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+ msp.rl_index = rl_index;
+ msp.rl_update = true;
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed configuring rate %u: %d\n",
+ rate, err);
+ /* remove the rate from the table */
+ if (rate)
+ mlx5_rl_remove_rate(mdev, &rl);
+ return err;
+ }
+
+ sq->rate_limit = rate;
+ return 0;
+}
+
+static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_txqsq *sq = priv->txq2sq[index];
+ int err = 0;
+
+ if (!mlx5_rl_is_supported(mdev)) {
+ netdev_err(dev, "Rate limiting is not supported on this device\n");
+ return -EINVAL;
+ }
+
+ /* rate is given in Mb/sec, HW config is in Kb/sec */
+ rate = rate << 10;
+
+ /* Check whether rate in valid range, 0 is always valid */
+ if (rate && !mlx5_rl_is_in_range(mdev, rate)) {
+ netdev_err(dev, "TX rate %u, is not in range\n", rate);
+ return -ERANGE;
+ }
+
+ mutex_lock(&priv->state_lock);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ err = mlx5e_set_sq_maxrate(dev, sq, rate);
+ if (!err)
+ priv->tx_rates[index] = rate;
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam,
+ struct mlx5e_channel **cp)
+{
+ struct net_dim_cq_moder icocq_moder = {0, 0};
+ struct net_device *netdev = priv->netdev;
+ int cpu = mlx5e_get_cpu(priv, ix);
+ struct mlx5e_channel *c;
+ unsigned int irq;
+ int err;
+ int eqn;
+
+ err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+ if (err)
+ return err;
+
+ c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
+ if (!c)
+ return -ENOMEM;
+
+ c->priv = priv;
+ c->mdev = priv->mdev;
+ c->tstamp = &priv->tstamp;
+ c->ix = ix;
+ c->cpu = cpu;
+ c->pdev = &priv->mdev->pdev->dev;
+ c->netdev = priv->netdev;
+ c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+ c->num_tc = params->num_tc;
+ c->xdp = !!params->xdp_prog;
+ c->stats = &priv->channel_stats[ix].ch;
+
+ c->irq_desc = irq_to_desc(irq);
+
+ netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
+
+ err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
+ if (err)
+ goto err_napi_del;
+
+ err = mlx5e_open_tx_cqs(c, params, cparam);
+ if (err)
+ goto err_close_icosq_cq;
+
+ err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xdpsq.cq);
+ if (err)
+ goto err_close_tx_cqs;
+
+ err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq);
+ if (err)
+ goto err_close_xdp_tx_cqs;
+
+ /* XDP SQ CQ params are same as normal TXQ sq CQ params */
+ err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
+ &cparam->tx_cq, &c->rq.xdpsq.cq) : 0;
+ if (err)
+ goto err_close_rx_cq;
+
+ napi_enable(&c->napi);
+
+ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
+ if (err)
+ goto err_disable_napi;
+
+ err = mlx5e_open_sqs(c, params, cparam);
+ if (err)
+ goto err_close_icosq;
+
+ err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0;
+ if (err)
+ goto err_close_sqs;
+
+ err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq);
+ if (err)
+ goto err_close_xdp_sq;
+
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true);
+ if (err)
+ goto err_close_rq;
+
+ *cp = c;
+
+ return 0;
+
+err_close_rq:
+ mlx5e_close_rq(&c->rq);
+
+err_close_xdp_sq:
+ if (c->xdp)
+ mlx5e_close_xdpsq(&c->rq.xdpsq);
+
+err_close_sqs:
+ mlx5e_close_sqs(c);
+
+err_close_icosq:
+ mlx5e_close_icosq(&c->icosq);
+
+err_disable_napi:
+ napi_disable(&c->napi);
+ if (c->xdp)
+ mlx5e_close_cq(&c->rq.xdpsq.cq);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->rq.cq);
+
+err_close_xdp_tx_cqs:
+ mlx5e_close_cq(&c->xdpsq.cq);
+
+err_close_tx_cqs:
+ mlx5e_close_tx_cqs(c);
+
+err_close_icosq_cq:
+ mlx5e_close_cq(&c->icosq.cq);
+
+err_napi_del:
+ netif_napi_del(&c->napi);
+ kvfree(c);
+
+ return err;
+}
+
+static void mlx5e_activate_channel(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_activate_txqsq(&c->sq[tc]);
+ mlx5e_activate_rq(&c->rq);
+ netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
+}
+
+static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
+{
+ int tc;
+
+ mlx5e_deactivate_rq(&c->rq);
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_deactivate_txqsq(&c->sq[tc]);
+}
+
+static void mlx5e_close_channel(struct mlx5e_channel *c)
+{
+ mlx5e_close_xdpsq(&c->xdpsq);
+ mlx5e_close_rq(&c->rq);
+ if (c->xdp)
+ mlx5e_close_xdpsq(&c->rq.xdpsq);
+ mlx5e_close_sqs(c);
+ mlx5e_close_icosq(&c->icosq);
+ napi_disable(&c->napi);
+ if (c->xdp)
+ mlx5e_close_cq(&c->rq.xdpsq.cq);
+ mlx5e_close_cq(&c->rq.cq);
+ mlx5e_close_cq(&c->xdpsq.cq);
+ mlx5e_close_tx_cqs(c);
+ mlx5e_close_cq(&c->icosq.cq);
+ netif_napi_del(&c->napi);
+
+ kvfree(c);
+}
+
+#define DEFAULT_FRAG_SIZE (2048)
+
+static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_frags_info *info)
+{
+ u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ int frag_size_max = DEFAULT_FRAG_SIZE;
+ u32 buf_size = 0;
+ int i;
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (MLX5_IPSEC_DEV(mdev))
+ byte_count += MLX5E_METADATA_ETHER_LEN;
+#endif
+
+ if (mlx5e_rx_is_linear_skb(mdev, params)) {
+ int frag_stride;
+
+ frag_stride = mlx5e_rx_get_linear_frag_sz(params);
+ frag_stride = roundup_pow_of_two(frag_stride);
+
+ info->arr[0].frag_size = byte_count;
+ info->arr[0].frag_stride = frag_stride;
+ info->num_frags = 1;
+ info->wqe_bulk = PAGE_SIZE / frag_stride;
+ goto out;
+ }
+
+ if (byte_count > PAGE_SIZE +
+ (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max)
+ frag_size_max = PAGE_SIZE;
+
+ i = 0;
+ while (buf_size < byte_count) {
+ int frag_size = byte_count - buf_size;
+
+ if (i < MLX5E_MAX_RX_FRAGS - 1)
+ frag_size = min(frag_size, frag_size_max);
+
+ info->arr[i].frag_size = frag_size;
+ info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
+
+ buf_size += frag_size;
+ i++;
+ }
+ info->num_frags = i;
+ /* number of different wqes sharing a page */
+ info->wqe_bulk = 1 + (info->num_frags % 2);
+
+out:
+ info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
+ info->log_num_frags = order_base_2(info->num_frags);
+}
+
+static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
+{
+ int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
+
+ switch (wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ sz += sizeof(struct mlx5e_rx_wqe_ll);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ sz += sizeof(struct mlx5e_rx_wqe_cyc);
+ }
+
+ return order_base_2(sz);
+}
+
+static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *param)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int ndsegs = 1;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ mlx5e_mpwqe_get_log_num_strides(mdev, params) -
+ MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
+ MLX5_SET(wq, wq, log_wqe_stride_size,
+ mlx5e_mpwqe_get_log_stride_size(mdev, params) -
+ MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
+ MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params));
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
+ mlx5e_build_rq_frags_info(mdev, params, &param->frags_info);
+ ndsegs = param->frags_info.num_frags;
+ }
+
+ MLX5_SET(wq, wq, wq_type, params->rq_wq_type);
+ MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
+ MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn);
+ MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
+ MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
+ MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
+
+ param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+}
+
+static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_rq_param *param)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
+ MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
+
+ param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+}
+
+static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn);
+
+ param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
+}
+
+static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev));
+}
+
+static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
+}
+
+static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *cqc = param->cqc;
+ u8 log_cq_size;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) +
+ mlx5e_mpwqe_get_log_num_strides(mdev, params);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ log_cq_size = params->log_rq_mtu_frames;
+ }
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
+ MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+ }
+
+ mlx5e_build_common_cq_param(priv, param);
+ param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
+}
+
+static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
+
+ mlx5e_build_common_cq_param(priv, param);
+ param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
+}
+
+static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
+
+ mlx5e_build_common_cq_param(priv, param);
+
+ param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
+}
+
+static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+}
+
+static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
+{
+ u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+ mlx5e_build_rq_param(priv, params, &cparam->rq);
+ mlx5e_build_sq_param(priv, params, &cparam->sq);
+ mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+ mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
+ mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq);
+ mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
+ mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq);
+}
+
+int mlx5e_open_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs)
+{
+ struct mlx5e_channel_param *cparam;
+ int err = -ENOMEM;
+ int i;
+
+ chs->num = chs->params.num_channels;
+
+ chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL);
+ cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
+ if (!chs->c || !cparam)
+ goto err_free;
+
+ mlx5e_build_channel_param(priv, &chs->params, cparam);
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]);
+ if (err)
+ goto err_close_channels;
+ }
+
+ kvfree(cparam);
+ return 0;
+
+err_close_channels:
+ for (i--; i >= 0; i--)
+ mlx5e_close_channel(chs->c[i]);
+
+err_free:
+ kfree(chs->c);
+ kvfree(cparam);
+ chs->num = 0;
+ return err;
+}
+
+static void mlx5e_activate_channels(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_activate_channel(chs->c[i]);
+}
+
+static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq,
+ err ? 0 : 20000);
+
+ return err ? -ETIMEDOUT : 0;
+}
+
+static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_deactivate_channel(chs->c[i]);
+}
+
+void mlx5e_close_channels(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_close_channel(chs->c[i]);
+
+ kfree(chs->c);
+ chs->num = 0;
+}
+
+static int
+mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqtc;
+ int inlen;
+ int err;
+ u32 *in;
+ int i;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
+
+ for (i = 0; i < sz; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn);
+
+ err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn);
+ if (!err)
+ rqt->enabled = true;
+
+ kvfree(in);
+ return err;
+}
+
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
+{
+ rqt->enabled = false;
+ mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
+}
+
+int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rqt *rqt = &priv->indir_rqt;
+ int err;
+
+ err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt);
+ if (err)
+ mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err);
+ return err;
+}
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rqt *rqt;
+ int err;
+ int ix;
+
+ for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+ rqt = &priv->direct_tir[ix].rqt;
+ err = mlx5e_create_rqt(priv, 1 /*size */, rqt);
+ if (err)
+ goto err_destroy_rqts;
+ }
+
+ return 0;
+
+err_destroy_rqts:
+ mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err);
+ for (ix--; ix >= 0; ix--)
+ mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt);
+
+ return err;
+}
+
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->profile->max_nch(priv->mdev); i++)
+ mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+}
+
+static int mlx5e_rx_hash_fn(int hfunc)
+{
+ return (hfunc == ETH_RSS_HASH_TOP) ?
+ MLX5_RX_HASH_FN_TOEPLITZ :
+ MLX5_RX_HASH_FN_INVERTED_XOR8;
+}
+
+int mlx5e_bits_invert(unsigned long a, int size)
+{
+ int inv = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+
+ return inv;
+}
+
+static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
+ struct mlx5e_redirect_rqt_param rrp, void *rqtc)
+{
+ int i;
+
+ for (i = 0; i < sz; i++) {
+ u32 rqn;
+
+ if (rrp.is_rss) {
+ int ix = i;
+
+ if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
+ ix = mlx5e_bits_invert(i, ilog2(sz));
+
+ ix = priv->channels.params.indirection_rqt[ix];
+ rqn = rrp.rss.channels->c[ix]->rq.rqn;
+ } else {
+ rqn = rrp.rqn;
+ }
+ MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
+ }
+}
+
+int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
+ struct mlx5e_redirect_rqt_param rrp)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqtc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
+ mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc);
+ err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_redirect_rqt_param rrp)
+{
+ if (!rrp.is_rss)
+ return rrp.rqn;
+
+ if (ix >= rrp.rss.channels->num)
+ return priv->drop_rq.rqn;
+
+ return rrp.rss.channels->c[ix]->rq.rqn;
+}
+
+static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
+ struct mlx5e_redirect_rqt_param rrp)
+{
+ u32 rqtn;
+ int ix;
+
+ if (priv->indir_rqt.enabled) {
+ /* RSS RQ table */
+ rqtn = priv->indir_rqt.rqtn;
+ mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
+ }
+
+ for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+ struct mlx5e_redirect_rqt_param direct_rrp = {
+ .is_rss = false,
+ {
+ .rqn = mlx5e_get_direct_rqn(priv, ix, rrp)
+ },
+ };
+
+ /* Direct RQ Tables */
+ if (!priv->direct_tir[ix].rqt.enabled)
+ continue;
+
+ rqtn = priv->direct_tir[ix].rqt.rqtn;
+ mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
+ }
+}
+
+static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs)
+{
+ struct mlx5e_redirect_rqt_param rrp = {
+ .is_rss = true,
+ {
+ .rss = {
+ .channels = chs,
+ .hfunc = chs->params.rss_hfunc,
+ }
+ },
+ };
+
+ mlx5e_redirect_rqts(priv, rrp);
+}
+
+static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
+{
+ struct mlx5e_redirect_rqt_param drop_rrp = {
+ .is_rss = false,
+ {
+ .rqn = priv->drop_rq.rqn,
+ },
+ };
+
+ mlx5e_redirect_rqts(priv, drop_rrp);
+}
+
+static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
+{
+ if (!params->lro_en)
+ return;
+
+#define ROUGH_MAX_L2_L3_HDR_SZ 256
+
+ MLX5_SET(tirc, tirc, lro_enable_mask,
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+ (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
+ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
+}
+
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
+ enum mlx5e_traffic_types tt,
+ void *tirc, bool inner)
+{
+ void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
+ MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+
+#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP)
+
+#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_L4_SPORT |\
+ MLX5_HASH_FIELD_SEL_L4_DPORT)
+
+#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc));
+ if (params->rss_hfunc == ETH_RSS_HASH_TOP) {
+ void *rss_key = MLX5_ADDR_OF(tirc, tirc,
+ rx_hash_toeplitz_key);
+ size_t len = MLX5_FLD_SZ_BYTES(tirc,
+ rx_hash_toeplitz_key);
+
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ memcpy(rss_key, params->toeplitz_hash_key, len);
+ }
+
+ switch (tt) {
+ case MLX5E_TT_IPV4_TCP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_L4PORTS);
+ break;
+
+ case MLX5E_TT_IPV6_TCP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_L4PORTS);
+ break;
+
+ case MLX5E_TT_IPV4_UDP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_L4PORTS);
+ break;
+
+ case MLX5E_TT_IPV6_UDP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_L4PORTS);
+ break;
+
+ case MLX5E_TT_IPV4_IPSEC_AH:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV6_IPSEC_AH:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV4_IPSEC_ESP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV6_IPSEC_ESP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV4:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ break;
+
+ case MLX5E_TT_IPV6:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ break;
+ default:
+ WARN_ONCE(true, "%s: bad traffic type!\n", __func__);
+ }
+}
+
+static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *tirc;
+ int inlen;
+ int err;
+ int tt;
+ int ix;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_tir_in, in, bitmask.lro, 1);
+ tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+
+ mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in,
+ inlen);
+ if (err)
+ goto free_in;
+ }
+
+ for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+ err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn,
+ in, inlen);
+ if (err)
+ goto free_in;
+ }
+
+free_in:
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
+ enum mlx5e_traffic_types tt,
+ u32 *tirc)
+{
+ MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+
+ mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
+ MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1);
+
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+}
+
+static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 mtu)
+{
+ u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
+ int err;
+
+ err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
+ if (err)
+ return err;
+
+ /* Update vport context MTU */
+ mlx5_modify_nic_vport_mtu(mdev, hw_mtu);
+ return 0;
+}
+
+static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 *mtu)
+{
+ u16 hw_mtu = 0;
+ int err;
+
+ err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
+ if (err || !hw_mtu) /* fallback to port oper mtu */
+ mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
+
+ *mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
+}
+
+static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 mtu;
+ int err;
+
+ err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
+ if (err)
+ return err;
+
+ mlx5e_query_mtu(mdev, params, &mtu);
+ if (mtu != params->sw_mtu)
+ netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
+ __func__, mtu, params->sw_mtu);
+
+ params->sw_mtu = mtu;
+ return 0;
+}
+
+static void mlx5e_netdev_set_tcs(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int nch = priv->channels.params.num_channels;
+ int ntc = priv->channels.params.num_tc;
+ int tc;
+
+ netdev_reset_tc(netdev);
+
+ if (ntc == 1)
+ return;
+
+ netdev_set_num_tc(netdev, ntc);
+
+ /* Map netdev TCs to offset 0
+ * We have our own UP to TXQ mapping for QoS
+ */
+ for (tc = 0; tc < ntc; tc++)
+ netdev_set_tc_queue(netdev, tc, nch, 0);
+}
+
+static void mlx5e_build_tc2txq_maps(struct mlx5e_priv *priv)
+{
+ int max_nch = priv->profile->max_nch(priv->mdev);
+ int i, tc;
+
+ for (i = 0; i < max_nch; i++)
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ priv->channel_tc2txq[i][tc] = i + tc * max_nch;
+}
+
+static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv)
+{
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+ int i, tc;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ c = priv->channels.c[i];
+ for (tc = 0; tc < c->num_tc; tc++) {
+ sq = &c->sq[tc];
+ priv->txq2sq[sq->txq_ix] = sq;
+ }
+ }
+}
+
+void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
+{
+ int num_txqs = priv->channels.num * priv->channels.params.num_tc;
+ struct net_device *netdev = priv->netdev;
+
+ mlx5e_netdev_set_tcs(netdev);
+ netif_set_real_num_tx_queues(netdev, num_txqs);
+ netif_set_real_num_rx_queues(netdev, priv->channels.num);
+
+ mlx5e_build_tx2sq_maps(priv);
+ mlx5e_activate_channels(&priv->channels);
+ mlx5e_xdp_tx_enable(priv);
+ netif_tx_start_all_queues(priv->netdev);
+
+ if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ mlx5e_add_sqs_fwd_rules(priv);
+
+ mlx5e_wait_channels_min_rx_wqes(&priv->channels);
+ mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
+}
+
+void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
+{
+ mlx5e_redirect_rqts_to_drop(priv);
+
+ if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ mlx5e_remove_sqs_fwd_rules(priv);
+
+ /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
+ * polling for inactive tx queues.
+ */
+ netif_tx_stop_all_queues(priv->netdev);
+ netif_tx_disable(priv->netdev);
+ mlx5e_xdp_tx_disable(priv);
+ mlx5e_deactivate_channels(&priv->channels);
+}
+
+void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *new_chs,
+ mlx5e_fp_hw_modify hw_modify)
+{
+ struct net_device *netdev = priv->netdev;
+ int new_num_txqs;
+ int carrier_ok;
+ new_num_txqs = new_chs->num * new_chs->params.num_tc;
+
+ carrier_ok = netif_carrier_ok(netdev);
+ netif_carrier_off(netdev);
+
+ if (new_num_txqs < netdev->real_num_tx_queues)
+ netif_set_real_num_tx_queues(netdev, new_num_txqs);
+
+ mlx5e_deactivate_priv_channels(priv);
+ mlx5e_close_channels(&priv->channels);
+
+ priv->channels = *new_chs;
+
+ /* New channels are ready to roll, modify HW settings if needed */
+ if (hw_modify)
+ hw_modify(priv);
+
+ mlx5e_refresh_tirs(priv, false);
+ mlx5e_activate_priv_channels(priv);
+
+ /* return carrier back if needed */
+ if (carrier_ok)
+ netif_carrier_on(netdev);
+}
+
+void mlx5e_timestamp_init(struct mlx5e_priv *priv)
+{
+ priv->tstamp.tx_type = HWTSTAMP_TX_OFF;
+ priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
+}
+
+int mlx5e_open_locked(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ set_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ err = mlx5e_open_channels(priv, &priv->channels);
+ if (err)
+ goto err_clear_state_opened_flag;
+
+ mlx5e_refresh_tirs(priv, false);
+ mlx5e_activate_priv_channels(priv);
+ if (priv->profile->update_carrier)
+ priv->profile->update_carrier(priv);
+
+ if (priv->profile->update_stats)
+ queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+
+ return 0;
+
+err_clear_state_opened_flag:
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+ return err;
+}
+
+int mlx5e_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_open_locked(netdev);
+ if (!err)
+ mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
+ mutex_unlock(&priv->state_lock);
+
+ if (mlx5_vxlan_allowed(priv->mdev->vxlan))
+ udp_tunnel_get_rx_info(netdev);
+
+ return err;
+}
+
+int mlx5e_close_locked(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ /* May already be CLOSED in case a previous configuration operation
+ * (e.g RX/TX queue size change) that involves close&open failed.
+ */
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ netif_carrier_off(priv->netdev);
+ mlx5e_deactivate_priv_channels(priv);
+ mlx5e_close_channels(&priv->channels);
+
+ return 0;
+}
+
+int mlx5e_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (!netif_device_present(netdev))
+ return -ENODEV;
+
+ mutex_lock(&priv->state_lock);
+ mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN);
+ err = mlx5e_close_locked(netdev);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq *rq,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int err;
+
+ param->wq.db_numa_node = param->wq.buf_numa_node;
+
+ err = mlx5_wq_cyc_create(mdev, &param->wq, rqc_wq, &rq->wqe.wq,
+ &rq->wq_ctrl);
+ if (err)
+ return err;
+
+ /* Mark as unused given "Drop-RQ" packets never reach XDP */
+ xdp_rxq_info_unused(&rq->xdp_rxq);
+
+ rq->mdev = mdev;
+
+ return 0;
+}
+
+static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
+ struct mlx5e_cq *cq,
+ struct mlx5e_cq_param *param)
+{
+ param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+ param->wq.db_numa_node = dev_to_node(&mdev->pdev->dev);
+
+ return mlx5e_alloc_cq_common(mdev, param, cq);
+}
+
+static int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+ struct mlx5e_rq *drop_rq)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_cq_param cq_param = {};
+ struct mlx5e_rq_param rq_param = {};
+ struct mlx5e_cq *cq = &drop_rq->cq;
+ int err;
+
+ mlx5e_build_drop_rq_param(priv, &rq_param);
+
+ err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param);
+ if (err)
+ return err;
+
+ err = mlx5e_create_cq(cq, &cq_param);
+ if (err)
+ goto err_free_cq;
+
+ err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param);
+ if (err)
+ goto err_destroy_cq;
+
+ err = mlx5e_create_rq(drop_rq, &rq_param);
+ if (err)
+ goto err_free_rq;
+
+ err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err)
+ mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err);
+
+ return 0;
+
+err_free_rq:
+ mlx5e_free_rq(drop_rq);
+
+err_destroy_cq:
+ mlx5e_destroy_cq(cq);
+
+err_free_cq:
+ mlx5e_free_cq(cq);
+
+ return err;
+}
+
+static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
+{
+ mlx5e_destroy_rq(drop_rq);
+ mlx5e_free_rq(drop_rq);
+ mlx5e_destroy_cq(&drop_rq->cq);
+ mlx5e_free_cq(&drop_rq->cq);
+}
+
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
+ u32 underlay_qpn, u32 *tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, prio, tc << 1);
+ MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
+ MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
+
+ if (mlx5_lag_is_lacp_owner(mdev))
+ MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
+
+ return mlx5_core_create_tis(mdev, in, sizeof(in), tisn);
+}
+
+void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
+{
+ mlx5_core_destroy_tis(mdev, tisn);
+}
+
+int mlx5e_create_tises(struct mlx5e_priv *priv)
+{
+ int err;
+ int tc;
+
+ for (tc = 0; tc < priv->profile->max_tc; tc++) {
+ err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]);
+ if (err)
+ goto err_close_tises;
+ }
+
+ return 0;
+
+err_close_tises:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
+
+ return err;
+}
+
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
+{
+ int tc;
+
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
+}
+
+static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
+ enum mlx5e_traffic_types tt,
+ u32 *tirc)
+{
+ MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+
+ mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+}
+
+static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
+{
+ MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+
+ mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, rqtn);
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
+}
+
+int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tir *tir;
+ void *tirc;
+ int inlen;
+ int i = 0;
+ int err;
+ u32 *in;
+ int tt;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(in, 0, inlen);
+ tir = &priv->indir_tir[tt];
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ mlx5e_build_indir_tir_ctx(priv, tt, tirc);
+ err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
+ goto err_destroy_inner_tirs;
+ }
+ }
+
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ goto out;
+
+ for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
+ memset(in, 0, inlen);
+ tir = &priv->inner_indir_tir[i];
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ mlx5e_build_inner_indir_tir_ctx(priv, i, tirc);
+ err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
+ goto err_destroy_inner_tirs;
+ }
+ }
+
+out:
+ kvfree(in);
+
+ return 0;
+
+err_destroy_inner_tirs:
+ for (i--; i >= 0; i--)
+ mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
+
+ for (tt--; tt >= 0; tt--)
+ mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
+{
+ int nch = priv->profile->max_nch(priv->mdev);
+ struct mlx5e_tir *tir;
+ void *tirc;
+ int inlen;
+ int err;
+ u32 *in;
+ int ix;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ for (ix = 0; ix < nch; ix++) {
+ memset(in, 0, inlen);
+ tir = &priv->direct_tir[ix];
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc);
+ err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+ if (err)
+ goto err_destroy_ch_tirs;
+ }
+
+ kvfree(in);
+
+ return 0;
+
+err_destroy_ch_tirs:
+ mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err);
+ for (ix--; ix >= 0; ix--)
+ mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]);
+
+ kvfree(in);
+
+ return err;
+}
+
+void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
+ mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
+
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return;
+
+ for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
+ mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
+}
+
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
+{
+ int nch = priv->profile->max_nch(priv->mdev);
+ int i;
+
+ for (i = 0; i < nch; i++)
+ mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]);
+}
+
+static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
+ struct tc_mqprio_qopt *mqprio)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels new_channels = {};
+ u8 tc = mqprio->num_tc;
+ int err = 0;
+
+ mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+ if (tc && tc != MLX5E_MAX_NUM_TC)
+ return -EINVAL;
+
+ mutex_lock(&priv->state_lock);
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.num_tc = tc ? tc : 1;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
+ new_channels.params.num_tc);
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *cls_flower,
+ int flags)
+{
+ switch (cls_flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return mlx5e_configure_flower(priv, cls_flower, flags);
+ case TC_CLSFLOWER_DESTROY:
+ return mlx5e_delete_flower(priv, cls_flower, flags);
+ case TC_CLSFLOWER_STATS:
+ return mlx5e_stats_flower(priv, cls_flower, flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
+ priv, priv, f->extack);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
+ priv);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+#endif
+
+static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+#ifdef CONFIG_MLX5_ESWITCH
+ case TC_SETUP_BLOCK:
+ return mlx5e_setup_tc_block(dev, type_data);
+#endif
+ case TC_SETUP_QDISC_MQPRIO:
+ return mlx5e_setup_tc_mqprio(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void
+mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+ struct mlx5e_vport_stats *vstats = &priv->stats.vport;
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+
+ /* update HW stats in background for next time */
+ queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+
+ if (mlx5e_is_uplink_rep(priv)) {
+ stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+ stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok);
+ stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+ stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+ } else {
+ mlx5e_grp_sw_update_stats(priv);
+ stats->rx_packets = sstats->rx_packets;
+ stats->rx_bytes = sstats->rx_bytes;
+ stats->tx_packets = sstats->tx_packets;
+ stats->tx_bytes = sstats->tx_bytes;
+ stats->tx_dropped = sstats->tx_queue_dropped;
+ }
+
+ stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
+
+ stats->rx_length_errors =
+ PPORT_802_3_GET(pstats, a_in_range_length_errors) +
+ PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
+ PPORT_802_3_GET(pstats, a_frame_too_long_errors);
+ stats->rx_crc_errors =
+ PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
+ stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
+ stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
+ stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
+ stats->rx_frame_errors;
+ stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
+
+ /* vport multicast also counts packets that are dropped due to steering
+ * or rx out of buffer
+ */
+ stats->multicast =
+ VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
+}
+
+static void mlx5e_set_rx_mode(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+}
+
+static int mlx5e_set_mac(struct net_device *netdev, void *addr)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct sockaddr *saddr = addr;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ netif_addr_lock_bh(netdev);
+ ether_addr_copy(netdev->dev_addr, saddr->sa_data);
+ netif_addr_unlock_bh(netdev);
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+
+ return 0;
+}
+
+#define MLX5E_SET_FEATURE(features, feature, enable) \
+ do { \
+ if (enable) \
+ *features |= feature; \
+ else \
+ *features &= ~feature; \
+ } while (0)
+
+typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
+
+static int set_feature_lro(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ struct mlx5e_params *old_params;
+ int err = 0;
+ bool reset;
+
+ mutex_lock(&priv->state_lock);
+
+ old_params = &priv->channels.params;
+ if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
+ netdev_warn(netdev, "can't set LRO with legacy RQ\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ new_channels.params = *old_params;
+ new_channels.params.lro_en = enable;
+
+ if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) ==
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params))
+ reset = false;
+ }
+
+ if (!reset) {
+ *old_params = new_channels.params;
+ err = mlx5e_modify_tirs_lro(priv);
+ goto out;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro);
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (enable)
+ mlx5e_enable_cvlan_filter(priv);
+ else
+ mlx5e_disable_cvlan_filter(priv);
+
+ return 0;
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (!enable && mlx5e_tc_num_filters(priv)) {
+ netdev_err(netdev,
+ "Active offloaded tc filters, can't turn hw_tc_offload off\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
+
+static int set_feature_rx_all(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_set_port_fcs(mdev, !enable);
+}
+
+static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+
+ priv->channels.params.scatter_fcs_en = enable;
+ err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable);
+ if (err)
+ priv->channels.params.scatter_fcs_en = !enable;
+
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ priv->channels.params.vlan_strip_disable = !enable;
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
+ if (err)
+ priv->channels.params.vlan_strip_disable = enable;
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+#ifdef CONFIG_MLX5_EN_ARFS
+static int set_feature_arfs(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (enable)
+ err = mlx5e_arfs_enable(priv);
+ else
+ err = mlx5e_arfs_disable(priv);
+
+ return err;
+}
+#endif
+
+static int mlx5e_handle_feature(struct net_device *netdev,
+ netdev_features_t *features,
+ netdev_features_t feature,
+ mlx5e_feature_handler feature_handler)
+{
+ netdev_features_t changes = *features ^ netdev->features;
+ bool enable = !!(*features & feature);
+ int err;
+
+ if (!(changes & feature))
+ return 0;
+
+ err = feature_handler(netdev, enable);
+ if (err) {
+ MLX5E_SET_FEATURE(features, feature, !enable);
+ netdev_err(netdev, "%s feature %pNF failed, err %d\n",
+ enable ? "Enable" : "Disable", &feature, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ netdev_features_t oper_features = features;
+ int err = 0;
+
+#define MLX5E_HANDLE_FEATURE(feature, handler) \
+ mlx5e_handle_feature(netdev, &oper_features, feature, handler)
+
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
+ set_feature_cvlan_filter);
+#ifdef CONFIG_MLX5_ESWITCH
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
+#endif
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
+#ifdef CONFIG_MLX5_EN_ARFS
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
+#endif
+
+ if (err) {
+ netdev->features = oper_features;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params *params;
+
+ mutex_lock(&priv->state_lock);
+ params = &priv->channels.params;
+ if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) {
+ /* HW strips the outer C-tag header, this is a problem
+ * for S-tag traffic.
+ */
+ features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+ if (!params->vlan_strip_disable)
+ netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
+ }
+ if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
+ features &= ~NETIF_F_LRO;
+ if (params->lro_en)
+ netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
+ }
+
+ if (params->xdp_prog) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "LRO is incompatible with XDP\n");
+ features &= ~NETIF_F_LRO;
+ }
+ }
+
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ features &= ~NETIF_F_RXHASH;
+ if (netdev->features & NETIF_F_RXHASH)
+ netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+ }
+
+ mutex_unlock(&priv->state_lock);
+
+ return features;
+}
+
+int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
+ change_hw_mtu_cb set_mtu_cb)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels new_channels = {};
+ struct mlx5e_params *params;
+ int err = 0;
+ bool reset;
+
+ mutex_lock(&priv->state_lock);
+
+ params = &priv->channels.params;
+
+ reset = !params->lro_en;
+ reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ new_channels.params = *params;
+ new_channels.params.sw_mtu = new_mtu;
+
+ if (params->xdp_prog &&
+ !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+ netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
+ new_mtu, mlx5e_xdp_max_mtu(params));
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params);
+ u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
+ u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
+
+ reset = reset && (is_linear || (ppw_old != ppw_new));
+ }
+
+ if (!reset) {
+ params->sw_mtu = new_mtu;
+ if (set_mtu_cb)
+ set_mtu_cb(priv);
+ netdev->mtu = params->sw_mtu;
+ goto out;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, set_mtu_cb);
+ netdev->mtu = new_channels.params.sw_mtu;
+
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu);
+}
+
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
+ (mlx5_clock_get_ptp_index(priv->mdev) == -1))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ /* TX HW timestamp */
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ mutex_lock(&priv->state_lock);
+ /* RX HW timestamp */
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ /* Reset CQE compression to Admin default */
+ mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ /* Disable CQE compression */
+ netdev_warn(priv->netdev, "Disabling cqe compression");
+ err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
+ if (err) {
+ netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
+ mutex_unlock(&priv->state_lock);
+ return err;
+ }
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ mutex_unlock(&priv->state_lock);
+ return -ERANGE;
+ }
+
+ memcpy(&priv->tstamp, &config, sizeof(config));
+ mutex_unlock(&priv->state_lock);
+
+ /* might need to fix some features */
+ netdev_update_features(priv->netdev);
+
+ return copy_to_user(ifr->ifr_data, &config,
+ sizeof(config)) ? -EFAULT : 0;
+}
+
+int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+ struct hwtstamp_config *cfg = &priv->tstamp;
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+ return -EOPNOTSUPP;
+
+ return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
+}
+
+static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return mlx5e_hwstamp_set(priv, ifr);
+ case SIOCGHWTSTAMP:
+ return mlx5e_hwstamp_get(priv, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
+}
+
+static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+ __be16 vlan_proto)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (vlan_proto != htons(ETH_P_8021Q))
+ return -EPROTONOSUPPORT;
+
+ return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
+ vlan, qos);
+}
+
+static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting);
+}
+
+static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
+}
+
+static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1,
+ max_tx_rate, min_tx_rate);
+}
+
+static int mlx5_vport_link2ifla(u8 esw_link)
+{
+ switch (esw_link) {
+ case MLX5_VPORT_ADMIN_STATE_DOWN:
+ return IFLA_VF_LINK_STATE_DISABLE;
+ case MLX5_VPORT_ADMIN_STATE_UP:
+ return IFLA_VF_LINK_STATE_ENABLE;
+ }
+ return IFLA_VF_LINK_STATE_AUTO;
+}
+
+static int mlx5_ifla_link2vport(u8 ifla_link)
+{
+ switch (ifla_link) {
+ case IFLA_VF_LINK_STATE_DISABLE:
+ return MLX5_VPORT_ADMIN_STATE_DOWN;
+ case IFLA_VF_LINK_STATE_ENABLE:
+ return MLX5_VPORT_ADMIN_STATE_UP;
+ }
+ return MLX5_VPORT_ADMIN_STATE_AUTO;
+}
+
+static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
+ int link_state)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
+ mlx5_ifla_link2vport(link_state));
+}
+
+static int mlx5e_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
+ if (err)
+ return err;
+ ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate);
+ return 0;
+}
+
+static int mlx5e_get_vf_stats(struct net_device *dev,
+ int vf, struct ifla_vf_stats *vf_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
+ vf_stats);
+}
+#endif
+
+struct mlx5e_vxlan_work {
+ struct work_struct work;
+ struct mlx5e_priv *priv;
+ u16 port;
+};
+
+static void mlx5e_vxlan_add_work(struct work_struct *work)
+{
+ struct mlx5e_vxlan_work *vxlan_work =
+ container_of(work, struct mlx5e_vxlan_work, work);
+ struct mlx5e_priv *priv = vxlan_work->priv;
+ u16 port = vxlan_work->port;
+
+ mutex_lock(&priv->state_lock);
+ mlx5_vxlan_add_port(priv->mdev->vxlan, port);
+ mutex_unlock(&priv->state_lock);
+
+ kfree(vxlan_work);
+}
+
+static void mlx5e_vxlan_del_work(struct work_struct *work)
+{
+ struct mlx5e_vxlan_work *vxlan_work =
+ container_of(work, struct mlx5e_vxlan_work, work);
+ struct mlx5e_priv *priv = vxlan_work->priv;
+ u16 port = vxlan_work->port;
+
+ mutex_lock(&priv->state_lock);
+ mlx5_vxlan_del_port(priv->mdev->vxlan, port);
+ mutex_unlock(&priv->state_lock);
+ kfree(vxlan_work);
+}
+
+static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
+{
+ struct mlx5e_vxlan_work *vxlan_work;
+
+ vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
+ if (!vxlan_work)
+ return;
+
+ if (add)
+ INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work);
+ else
+ INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work);
+
+ vxlan_work->priv = priv;
+ vxlan_work->port = port;
+ queue_work(priv->wq, &vxlan_work->work);
+}
+
+static void mlx5e_add_vxlan_port(struct net_device *netdev,
+ struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+ return;
+
+ if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
+ return;
+
+ mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
+}
+
+static void mlx5e_del_vxlan_port(struct net_device *netdev,
+ struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+ return;
+
+ if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
+ return;
+
+ mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
+}
+
+static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ unsigned int offset = 0;
+ struct udphdr *udph;
+ u8 proto;
+ u16 port;
+
+ switch (vlan_get_protocol(skb)) {
+ case htons(ETH_P_IP):
+ proto = ip_hdr(skb)->protocol;
+ break;
+ case htons(ETH_P_IPV6):
+ proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
+ break;
+ default:
+ goto out;
+ }
+
+ switch (proto) {
+ case IPPROTO_GRE:
+ return features;
+ case IPPROTO_UDP:
+ udph = udp_hdr(skb);
+ port = be16_to_cpu(udph->dest);
+
+ /* Verify if UDP port is being offloaded by HW */
+ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
+ return features;
+ }
+
+out:
+ /* Disable CSUM and GSO if the udp dport is not offloaded by HW */
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ features = vlan_features_check(skb, features);
+ features = vxlan_features_check(skb, features);
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (mlx5e_ipsec_feature_check(skb, netdev, features))
+ return features;
+#endif
+
+ /* Validate if the tunneled packet is being offloaded by HW */
+ if (skb->encapsulation &&
+ (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
+ return mlx5e_tunnel_features_check(priv, skb, features);
+
+ return features;
+}
+
+static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
+ struct mlx5e_txqsq *sq)
+{
+ struct mlx5_eq *eq = sq->cq.mcq.eq;
+ u32 eqe_count;
+
+ netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+ eq->eqn, eq->cons_index, eq->irqn);
+
+ eqe_count = mlx5_eq_poll_irq_disabled(eq);
+ if (!eqe_count)
+ return false;
+
+ netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
+ sq->channel->stats->eq_rearm++;
+ return true;
+}
+
+static void mlx5e_tx_timeout_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ tx_timeout_work);
+ struct net_device *dev = priv->netdev;
+ bool reopen_channels = false;
+ int i, err;
+
+ rtnl_lock();
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
+ struct mlx5e_txqsq *sq = priv->txq2sq[i];
+
+ if (!netif_xmit_stopped(dev_queue))
+ continue;
+
+ netdev_err(dev,
+ "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
+ i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+ jiffies_to_usecs(jiffies - dev_queue->trans_start));
+
+ /* If we recover a lost interrupt, most likely TX timeout will
+ * be resolved, skip reopening channels
+ */
+ if (!mlx5e_tx_timeout_eq_recover(dev, sq)) {
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ reopen_channels = true;
+ }
+ }
+
+ if (!reopen_channels)
+ goto unlock;
+
+ mlx5e_close_locked(dev);
+ err = mlx5e_open_locked(dev);
+ if (err)
+ netdev_err(priv->netdev,
+ "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+ err);
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ rtnl_unlock();
+}
+
+static void mlx5e_tx_timeout(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ netdev_err(dev, "TX timeout detected\n");
+ queue_work(priv->wq, &priv->tx_timeout_work);
+}
+
+static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_channels new_channels = {};
+
+ if (priv->channels.params.lro_en) {
+ netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+ return -EINVAL;
+ }
+
+ if (MLX5_IPSEC_DEV(priv->mdev)) {
+ netdev_warn(netdev, "can't set XDP with IPSec offload\n");
+ return -EINVAL;
+ }
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.xdp_prog = prog;
+
+ if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+ netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
+ new_channels.params.sw_mtu,
+ mlx5e_xdp_max_mtu(&new_channels.params));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct bpf_prog *old_prog;
+ bool reset, was_opened;
+ int err = 0;
+ int i;
+
+ mutex_lock(&priv->state_lock);
+
+ if (prog) {
+ err = mlx5e_xdp_allowed(priv, prog);
+ if (err)
+ goto unlock;
+ }
+
+ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ /* no need for full reset when exchanging programs */
+ reset = (!priv->channels.params.xdp_prog || !prog);
+
+ if (was_opened && reset)
+ mlx5e_close_locked(netdev);
+ if (was_opened && !reset) {
+ /* num_channels is invariant here, so we can take the
+ * batched reference right upfront.
+ */
+ prog = bpf_prog_add(prog, priv->channels.num);
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto unlock;
+ }
+ }
+
+ /* exchange programs, extra prog reference we got from caller
+ * as long as we don't fail from this point onwards.
+ */
+ old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (reset) /* change RQ type according to priv->xdp_prog */
+ mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
+
+ if (was_opened && reset)
+ mlx5e_open_locked(netdev);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+ goto unlock;
+
+ /* exchanging programs w/o reset, we update ref counts on behalf
+ * of the channels RQs here.
+ */
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+ napi_synchronize(&c->napi);
+ /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
+
+ old_prog = xchg(&c->rq.xdp_prog, prog);
+
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+ /* napi_schedule in case we have missed anything */
+ napi_schedule(&c->napi);
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+
+ /* Need to fix some features. */
+ if (!err)
+ netdev_update_features(netdev);
+
+ return err;
+}
+
+static u32 mlx5e_xdp_query(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ const struct bpf_prog *xdp_prog;
+ u32 prog_id = 0;
+
+ mutex_lock(&priv->state_lock);
+ xdp_prog = priv->channels.params.xdp_prog;
+ if (xdp_prog)
+ prog_id = xdp_prog->aux->id;
+ mutex_unlock(&priv->state_lock);
+
+ return prog_id;
+}
+
+static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return mlx5e_xdp_set(dev, xdp->prog);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = mlx5e_xdp_query(dev);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+const struct net_device_ops mlx5e_netdev_ops = {
+ .ndo_open = mlx5e_open,
+ .ndo_stop = mlx5e_close,
+ .ndo_start_xmit = mlx5e_xmit,
+ .ndo_setup_tc = mlx5e_setup_tc,
+ .ndo_select_queue = mlx5e_select_queue,
+ .ndo_get_stats64 = mlx5e_get_stats,
+ .ndo_set_rx_mode = mlx5e_set_rx_mode,
+ .ndo_set_mac_address = mlx5e_set_mac,
+ .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid,
+ .ndo_set_features = mlx5e_set_features,
+ .ndo_fix_features = mlx5e_fix_features,
+ .ndo_change_mtu = mlx5e_change_nic_mtu,
+ .ndo_do_ioctl = mlx5e_ioctl,
+ .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate,
+ .ndo_udp_tunnel_add = mlx5e_add_vxlan_port,
+ .ndo_udp_tunnel_del = mlx5e_del_vxlan_port,
+ .ndo_features_check = mlx5e_features_check,
+ .ndo_tx_timeout = mlx5e_tx_timeout,
+ .ndo_bpf = mlx5e_xdp,
+ .ndo_xdp_xmit = mlx5e_xdp_xmit,
+#ifdef CONFIG_MLX5_EN_ARFS
+ .ndo_rx_flow_steer = mlx5e_rx_flow_steer,
+#endif
+#ifdef CONFIG_MLX5_ESWITCH
+ /* SRIOV E-Switch NDOs */
+ .ndo_set_vf_mac = mlx5e_set_vf_mac,
+ .ndo_set_vf_vlan = mlx5e_set_vf_vlan,
+ .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk,
+ .ndo_set_vf_trust = mlx5e_set_vf_trust,
+ .ndo_set_vf_rate = mlx5e_set_vf_rate,
+ .ndo_get_vf_config = mlx5e_get_vf_config,
+ .ndo_set_vf_link_state = mlx5e_set_vf_link_state,
+ .ndo_get_vf_stats = mlx5e_get_vf_stats,
+ .ndo_has_offload_stats = mlx5e_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_get_offload_stats,
+#endif
+};
+
+static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
+{
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EOPNOTSUPP;
+ if (!MLX5_CAP_GEN(mdev, eth_net_offloads) ||
+ !MLX5_CAP_GEN(mdev, nic_flow_table) ||
+ !MLX5_CAP_ETH(mdev, csum_cap) ||
+ !MLX5_CAP_ETH(mdev, max_lso_cap) ||
+ !MLX5_CAP_ETH(mdev, vlan_cap) ||
+ !MLX5_CAP_ETH(mdev, rss_ind_tbl_cap) ||
+ MLX5_CAP_FLOWTABLE(mdev,
+ flow_table_properties_nic_receive.max_ft_level)
+ < 3) {
+ mlx5_core_warn(mdev,
+ "Not creating net device, some required device capabilities are missing\n");
+ return -EOPNOTSUPP;
+ }
+ if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
+ mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+ if (!MLX5_CAP_GEN(mdev, cq_moderation))
+ mlx5_core_warn(mdev, "CQ moderation is not supported\n");
+
+ return 0;
+}
+
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+ int num_channels)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ indirection_rqt[i] = i % num_channels;
+}
+
+static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
+{
+ u32 link_speed = 0;
+ u32 pci_bw = 0;
+
+ mlx5e_port_max_linkspeed(mdev, &link_speed);
+ pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
+ mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
+ link_speed, pci_bw);
+
+#define MLX5E_SLOW_PCI_RATIO (2)
+
+ return link_speed && pci_bw &&
+ link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
+}
+
+static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+{
+ struct net_dim_cq_moder moder;
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+{
+ struct net_dim_cq_moder moder;
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
+{
+ return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->tx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
+ } else {
+ params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
+ }
+
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ params->tx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->rx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
+ } else {
+ params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
+ }
+
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ params->rx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
+{
+ int i;
+
+ /* The supported periods are organized in ascending order */
+ for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
+ if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
+ break;
+
+ return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
+}
+
+void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 max_channels, u16 mtu)
+{
+ u8 rx_cq_period_mode;
+
+ params->sw_mtu = mtu;
+ params->hard_mtu = MLX5E_ETH_HARD_MTU;
+ params->num_channels = max_channels;
+ params->num_tc = 1;
+
+ /* SQ */
+ params->log_sq_size = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
+ MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+
+ /* set CQE compression */
+ params->rx_cqe_compress_def = false;
+ if (MLX5_CAP_GEN(mdev, cqe_compression) &&
+ MLX5_CAP_GEN(mdev, vport_group_manager))
+ params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
+
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
+
+ /* RQ */
+ /* Prefer Striding RQ, unless any of the following holds:
+ * - Striding RQ configuration is not possible/supported.
+ * - Slow PCI heuristic.
+ * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+ */
+ if (!slow_pci_heuristic(mdev) &&
+ mlx5e_striding_rq_possible(mdev, params) &&
+ (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ||
+ !mlx5e_rx_is_linear_skb(mdev, params)))
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
+ mlx5e_set_rq_type(mdev, params);
+ mlx5e_init_rq_type_params(mdev, params);
+
+ /* HW LRO */
+
+ /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+ params->lro_en = !slow_pci_heuristic(mdev);
+ params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+
+ /* CQ moderation params */
+ rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+ MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
+ mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+
+ /* TX inline */
+ params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
+
+ /* RSS */
+ params->rss_hfunc = ETH_RSS_HASH_XOR;
+ netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
+ mlx5e_build_default_indir_rqt(params->indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, max_channels);
+}
+
+static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->profile = profile;
+ priv->ppriv = ppriv;
+ priv->msglevel = MLX5E_MSG_LEVEL;
+ priv->max_opened_tc = 1;
+
+ mlx5e_build_nic_params(mdev, &priv->channels.params,
+ profile->max_nch(mdev), netdev->mtu);
+
+ mutex_init(&priv->state_lock);
+
+ INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
+ INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+ INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
+ INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+ mlx5e_timestamp_init(priv);
+}
+
+static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr);
+ if (is_zero_ether_addr(netdev->dev_addr) &&
+ !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
+ eth_hw_addr_random(netdev);
+ mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr);
+ }
+}
+
+#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
+static const struct switchdev_ops mlx5e_switchdev_ops = {
+ .switchdev_port_attr_get = mlx5e_attr_get,
+};
+#endif
+
+static void mlx5e_build_nic_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ bool fcs_supported;
+ bool fcs_enabled;
+
+ SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
+
+ netdev->netdev_ops = &mlx5e_netdev_ops;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
+ netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+#endif
+
+ netdev->watchdog_timeo = 15 * HZ;
+
+ netdev->ethtool_ops = &mlx5e_ethtool_ops;
+
+ netdev->vlan_features |= NETIF_F_SG;
+ netdev->vlan_features |= NETIF_F_IP_CSUM;
+ netdev->vlan_features |= NETIF_F_IPV6_CSUM;
+ netdev->vlan_features |= NETIF_F_GRO;
+ netdev->vlan_features |= NETIF_F_TSO;
+ netdev->vlan_features |= NETIF_F_TSO6;
+ netdev->vlan_features |= NETIF_F_RXCSUM;
+ netdev->vlan_features |= NETIF_F_RXHASH;
+
+ netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX;
+ netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX;
+
+ if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
+ mlx5e_check_fragmented_striding_rq_cap(mdev))
+ netdev->vlan_features |= NETIF_F_LRO;
+
+ netdev->hw_features = netdev->vlan_features;
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
+
+ if (mlx5_vxlan_allowed(mdev->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+ netdev->hw_enc_features |= NETIF_F_IP_CSUM;
+ netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_enc_features |= NETIF_F_TSO;
+ netdev->hw_enc_features |= NETIF_F_TSO6;
+ netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
+ }
+
+ if (mlx5_vxlan_allowed(mdev->vxlan)) {
+ netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+ netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+ }
+
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+ netdev->hw_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ netdev->hw_enc_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ netdev->gso_partial_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ }
+
+ netdev->hw_features |= NETIF_F_GSO_PARTIAL;
+ netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4;
+ netdev->hw_features |= NETIF_F_GSO_UDP_L4;
+ netdev->features |= NETIF_F_GSO_UDP_L4;
+
+ mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
+
+ if (fcs_supported)
+ netdev->hw_features |= NETIF_F_RXALL;
+
+ if (MLX5_CAP_ETH(mdev, scatter_fcs))
+ netdev->hw_features |= NETIF_F_RXFCS;
+
+ netdev->features = netdev->hw_features;
+ if (!priv->channels.params.lro_en)
+ netdev->features &= ~NETIF_F_LRO;
+
+ if (fcs_enabled)
+ netdev->features &= ~NETIF_F_RXALL;
+
+ if (!priv->channels.params.scatter_fcs_en)
+ netdev->features &= ~NETIF_F_RXFCS;
+
+ /* prefere CQE compression over rxhash */
+ if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+ netdev->features &= ~NETIF_F_RXHASH;
+
+#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+ if (FT_CAP(flow_modify_en) &&
+ FT_CAP(modify_root) &&
+ FT_CAP(identified_miss_table_mode) &&
+ FT_CAP(flow_table_modify)) {
+#ifdef CONFIG_MLX5_ESWITCH
+ netdev->hw_features |= NETIF_F_HW_TC;
+#endif
+#ifdef CONFIG_MLX5_EN_ARFS
+ netdev->hw_features |= NETIF_F_NTUPLE;
+#endif
+ }
+
+ netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+
+ mlx5e_set_netdev_dev_addr(netdev);
+
+#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
+ if (MLX5_ESWITCH_MANAGER(mdev))
+ netdev->switchdev_ops = &mlx5e_switchdev_ops;
+#endif
+
+ mlx5e_ipsec_build_netdev(priv);
+ mlx5e_tls_build_netdev(priv);
+}
+
+static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter);
+ if (err) {
+ mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err);
+ priv->q_counter = 0;
+ }
+
+ err = mlx5_core_alloc_q_counter(mdev, &priv->drop_rq_q_counter);
+ if (err) {
+ mlx5_core_warn(mdev, "alloc drop RQ counter failed, %d\n", err);
+ priv->drop_rq_q_counter = 0;
+ }
+}
+
+static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
+{
+ if (priv->q_counter)
+ mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
+
+ if (priv->drop_rq_q_counter)
+ mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter);
+}
+
+static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+ err = mlx5e_ipsec_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
+ err = mlx5e_tls_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
+ mlx5e_build_nic_netdev(netdev);
+ mlx5e_build_tc2txq_maps(priv);
+}
+
+static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
+{
+ mlx5e_tls_cleanup(priv);
+ mlx5e_ipsec_cleanup(priv);
+}
+
+static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5e_create_indirect_rqt(priv);
+ if (err)
+ return err;
+
+ err = mlx5e_create_direct_rqts(priv);
+ if (err)
+ goto err_destroy_indirect_rqts;
+
+ err = mlx5e_create_indirect_tirs(priv);
+ if (err)
+ goto err_destroy_direct_rqts;
+
+ err = mlx5e_create_direct_tirs(priv);
+ if (err)
+ goto err_destroy_indirect_tirs;
+
+ err = mlx5e_create_flow_steering(priv);
+ if (err) {
+ mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
+ goto err_destroy_direct_tirs;
+ }
+
+ err = mlx5e_tc_nic_init(priv);
+ if (err)
+ goto err_destroy_flow_steering;
+
+ return 0;
+
+err_destroy_flow_steering:
+ mlx5e_destroy_flow_steering(priv);
+err_destroy_direct_tirs:
+ mlx5e_destroy_direct_tirs(priv);
+err_destroy_indirect_tirs:
+ mlx5e_destroy_indirect_tirs(priv);
+err_destroy_direct_rqts:
+ mlx5e_destroy_direct_rqts(priv);
+err_destroy_indirect_rqts:
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ return err;
+}
+
+static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
+{
+ mlx5e_tc_nic_cleanup(priv);
+ mlx5e_destroy_flow_steering(priv);
+ mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv);
+ mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+}
+
+static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_create_tises(priv);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+ return err;
+ }
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_initialize(priv);
+#endif
+ return 0;
+}
+
+static void mlx5e_nic_enable(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
+
+ mlx5e_init_l2_addr(priv);
+
+ /* Marking the link as currently not needed by the Driver */
+ if (!netif_running(netdev))
+ mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
+
+ /* MTU range: 68 - hw-specific max */
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_dev_port_mtu(priv);
+
+ mlx5_lag_add(mdev, netdev);
+
+ mlx5e_enable_async_events(priv);
+
+ if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ mlx5e_register_vport_reps(priv);
+
+ if (netdev->reg_state != NETREG_REGISTERED)
+ return;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_init_app(priv);
+#endif
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+
+ rtnl_lock();
+ if (netif_running(netdev))
+ mlx5e_open(netdev);
+ netif_device_attach(netdev);
+ rtnl_unlock();
+}
+
+static void mlx5e_nic_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (priv->netdev->reg_state == NETREG_REGISTERED)
+ mlx5e_dcbnl_delete_app(priv);
+#endif
+
+ rtnl_lock();
+ if (netif_running(priv->netdev))
+ mlx5e_close(priv->netdev);
+ netif_device_detach(priv->netdev);
+ rtnl_unlock();
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+
+ if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ mlx5e_unregister_vport_reps(priv);
+
+ mlx5e_disable_async_events(priv);
+ mlx5_lag_remove(mdev);
+}
+
+static const struct mlx5e_profile mlx5e_nic_profile = {
+ .init = mlx5e_nic_init,
+ .cleanup = mlx5e_nic_cleanup,
+ .init_rx = mlx5e_init_nic_rx,
+ .cleanup_rx = mlx5e_cleanup_nic_rx,
+ .init_tx = mlx5e_init_nic_tx,
+ .cleanup_tx = mlx5e_cleanup_nic_tx,
+ .enable = mlx5e_nic_enable,
+ .disable = mlx5e_nic_disable,
+ .update_stats = mlx5e_update_ndo_stats,
+ .max_nch = mlx5e_get_max_num_channels,
+ .update_carrier = mlx5e_update_carrier,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe,
+ .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .max_tc = MLX5E_MAX_NUM_TC,
+};
+
+/* mlx5e generic netdev management API (move to en_common.c) */
+
+struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ int nch = profile->max_nch(mdev);
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+
+ netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
+ nch * profile->max_tc,
+ nch);
+ if (!netdev) {
+ mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
+ return NULL;
+ }
+
+#ifdef CONFIG_MLX5_EN_ARFS
+ netdev->rx_cpu_rmap = mdev->rmap;
+#endif
+
+ profile->init(mdev, netdev, profile, ppriv);
+
+ netif_carrier_off(netdev);
+
+ priv = netdev_priv(netdev);
+
+ priv->wq = create_singlethread_workqueue("mlx5e");
+ if (!priv->wq)
+ goto err_cleanup_nic;
+
+ return netdev;
+
+err_cleanup_nic:
+ if (profile->cleanup)
+ profile->cleanup(priv);
+ free_netdev(netdev);
+
+ return NULL;
+}
+
+int mlx5e_attach_netdev(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ const struct mlx5e_profile *profile;
+ int max_nch;
+ int err;
+
+ profile = priv->profile;
+ clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
+
+ /* max number of channels may have changed */
+ max_nch = mlx5e_get_max_num_channels(priv->mdev);
+ if (priv->channels.params.num_channels > max_nch) {
+ mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
+ priv->channels.params.num_channels = max_nch;
+ mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, max_nch);
+ }
+
+ err = profile->init_tx(priv);
+ if (err)
+ goto out;
+
+ mlx5e_create_q_counters(priv);
+
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ goto err_destroy_q_counters;
+ }
+
+ err = profile->init_rx(priv);
+ if (err)
+ goto err_close_drop_rq;
+
+ if (profile->enable)
+ profile->enable(priv);
+
+ return 0;
+
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
+
+err_destroy_q_counters:
+ mlx5e_destroy_q_counters(priv);
+ profile->cleanup_tx(priv);
+
+out:
+ return err;
+}
+
+void mlx5e_detach_netdev(struct mlx5e_priv *priv)
+{
+ const struct mlx5e_profile *profile = priv->profile;
+
+ set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+
+ if (profile->disable)
+ profile->disable(priv);
+ flush_workqueue(priv->wq);
+
+ profile->cleanup_rx(priv);
+ mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_destroy_q_counters(priv);
+ profile->cleanup_tx(priv);
+ cancel_delayed_work_sync(&priv->update_stats_work);
+}
+
+void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
+{
+ const struct mlx5e_profile *profile = priv->profile;
+ struct net_device *netdev = priv->netdev;
+
+ destroy_workqueue(priv->wq);
+ if (profile->cleanup)
+ profile->cleanup(priv);
+ free_netdev(netdev);
+}
+
+/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying
+ * hardware contexts and to connect it to the current netdev.
+ */
+static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+ struct net_device *netdev = priv->netdev;
+ int err;
+
+ if (netif_device_present(netdev))
+ return 0;
+
+ err = mlx5e_create_mdev_resources(mdev);
+ if (err)
+ return err;
+
+ err = mlx5e_attach_netdev(priv);
+ if (err) {
+ mlx5e_destroy_mdev_resources(mdev);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+ struct net_device *netdev = priv->netdev;
+
+ if (!netif_device_present(netdev))
+ return;
+
+ mlx5e_detach_netdev(priv);
+ mlx5e_destroy_mdev_resources(mdev);
+}
+
+static void *mlx5e_add(struct mlx5_core_dev *mdev)
+{
+ struct net_device *netdev;
+ void *rpriv = NULL;
+ void *priv;
+ int err;
+
+ err = mlx5e_check_required_hca_cap(mdev);
+ if (err)
+ return NULL;
+
+#ifdef CONFIG_MLX5_ESWITCH
+ if (MLX5_ESWITCH_MANAGER(mdev)) {
+ rpriv = mlx5e_alloc_nic_rep_priv(mdev);
+ if (!rpriv) {
+ mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
+ return NULL;
+ }
+ }
+#endif
+
+ netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv);
+ if (!netdev) {
+ mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
+ goto err_free_rpriv;
+ }
+
+ priv = netdev_priv(netdev);
+
+ err = mlx5e_attach(mdev, priv);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err);
+ goto err_destroy_netdev;
+ }
+
+ err = register_netdev(netdev);
+ if (err) {
+ mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
+ goto err_detach;
+ }
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_init_app(priv);
+#endif
+ return priv;
+
+err_detach:
+ mlx5e_detach(mdev, priv);
+err_destroy_netdev:
+ mlx5e_destroy_netdev(priv);
+err_free_rpriv:
+ kfree(rpriv);
+ return NULL;
+}
+
+static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+ void *ppriv = priv->ppriv;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_delete_app(priv);
+#endif
+ unregister_netdev(priv->netdev);
+ mlx5e_detach(mdev, vpriv);
+ mlx5e_destroy_netdev(priv);
+ kfree(ppriv);
+}
+
+static void *mlx5e_get_netdev(void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+
+ return priv->netdev;
+}
+
+static struct mlx5_interface mlx5e_interface = {
+ .add = mlx5e_add,
+ .remove = mlx5e_remove,
+ .attach = mlx5e_attach,
+ .detach = mlx5e_detach,
+ .event = mlx5e_async_event,
+ .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
+ .get_dev = mlx5e_get_netdev,
+};
+
+void mlx5e_init(void)
+{
+ mlx5e_ipsec_build_inverse_table();
+ mlx5e_build_ptys2ethtool_map();
+ mlx5_register_interface(&mlx5e_interface);
+}
+
+void mlx5e_cleanup(void)
+{
+ mlx5_unregister_interface(&mlx5e_interface);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
new file mode 100644
index 000000000..1ab40d622
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -0,0 +1,1295 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/mlx5/fs.h>
+#include <net/switchdev.h>
+#include <net/pkt_cls.h>
+#include <net/act_api.h>
+#include <net/netevent.h>
+#include <net/arp.h>
+
+#include "eswitch.h"
+#include "en.h"
+#include "en_rep.h"
+#include "en_tc.h"
+#include "fs_core.h"
+
+#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
+ max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+#define MLX5E_REP_PARAMS_LOG_RQ_SIZE \
+ max(0x6, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)
+
+static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
+
+static void mlx5e_rep_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, mlx5e_rep_driver_name,
+ sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
+}
+
+static const struct counter_desc sw_rep_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+};
+
+struct vport_stats {
+ u64 vport_rx_packets;
+ u64 vport_tx_packets;
+ u64 vport_rx_bytes;
+ u64 vport_tx_bytes;
+};
+
+static const struct counter_desc vport_rep_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_packets) },
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_packets) },
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_bytes) },
+};
+
+#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
+#define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc)
+
+static void mlx5e_rep_get_strings(struct net_device *dev,
+ u32 stringset, uint8_t *data)
+{
+ int i, j;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
+ strcpy(data + (i * ETH_GSTRING_LEN),
+ sw_rep_stats_desc[i].format);
+ for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++)
+ strcpy(data + (i * ETH_GSTRING_LEN),
+ vport_rep_stats_desc[j].format);
+ break;
+ }
+}
+
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct rtnl_link_stats64 *vport_stats;
+ struct ifla_vf_stats vf_stats;
+ int err;
+
+ err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats);
+ if (err) {
+ pr_warn("vport %d error %d reading stats\n", rep->vport, err);
+ return;
+ }
+
+ vport_stats = &priv->stats.vf_vport;
+ /* flip tx/rx as we are reporting the counters for the switch vport */
+ vport_stats->rx_packets = vf_stats.tx_packets;
+ vport_stats->rx_bytes = vf_stats.tx_bytes;
+ vport_stats->tx_packets = vf_stats.rx_packets;
+ vport_stats->tx_bytes = vf_stats.rx_bytes;
+}
+
+static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5e_sw_stats *s = &priv->stats.sw;
+ struct mlx5e_rq_stats *rq_stats;
+ struct mlx5e_sq_stats *sq_stats;
+ int i, j;
+
+ memset(s, 0, sizeof(*s));
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ rq_stats = c->rq.stats;
+
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
+
+ for (j = 0; j < priv->channels.params.num_tc; j++) {
+ sq_stats = c->sq[j].stats;
+
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_queue_dropped += sq_stats->dropped;
+ }
+ }
+}
+
+static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int i, j;
+
+ if (!data)
+ return;
+
+ mutex_lock(&priv->state_lock);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_rep_update_sw_counters(priv);
+ mlx5e_rep_update_hw_counters(priv);
+ mutex_unlock(&priv->state_lock);
+
+ for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
+ data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
+ sw_rep_stats_desc, i);
+
+ for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++)
+ data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport,
+ vport_rep_stats_desc, j);
+}
+
+static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return NUM_VPORT_REP_SW_COUNTERS + NUM_VPORT_REP_HW_COUNTERS;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
+ .get_drvinfo = mlx5e_rep_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlx5e_rep_get_strings,
+ .get_sset_count = mlx5e_rep_get_sset_count,
+ .get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+};
+
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (esw->mode != SRIOV_OFFLOADS)
+ return -EOPNOTSUPP;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+ attr->u.ppid.id_len = ETH_ALEN;
+ ether_addr_copy(attr->u.ppid.id, rep->hw_id);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_sq *rep_sq, *tmp;
+ struct mlx5e_rep_priv *rpriv;
+
+ if (esw->mode != SRIOV_OFFLOADS)
+ return;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ list_del(&rep_sq->list);
+ kfree(rep_sq);
+ }
+}
+
+static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ u32 *sqns_array, int sqns_num)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+ int err;
+ int i;
+
+ if (esw->mode != SRIOV_OFFLOADS)
+ return 0;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ for (i = 0; i < sqns_num; i++) {
+ rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
+ if (!rep_sq) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ /* Add re-inject rule to the PF/representor sqs */
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
+ rep->vport,
+ sqns_array[i]);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ kfree(rep_sq);
+ goto out_err;
+ }
+ rep_sq->send_to_vport_rule = flow_rule;
+ list_add(&rep_sq->list, &rpriv->vport_sqs_list);
+ }
+ return 0;
+
+out_err:
+ mlx5e_sqs2vport_stop(esw, rep);
+ return err;
+}
+
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5e_channel *c;
+ int n, tc, num_sqs = 0;
+ int err = -ENOMEM;
+ u32 *sqs;
+
+ sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(*sqs), GFP_KERNEL);
+ if (!sqs)
+ goto out;
+
+ for (n = 0; n < priv->channels.num; n++) {
+ c = priv->channels.c[n];
+ for (tc = 0; tc < c->num_tc; tc++)
+ sqs[num_sqs++] = c->sq[tc].sqn;
+ }
+
+ err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs);
+ kfree(sqs);
+
+out:
+ if (err)
+ netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err);
+ return err;
+}
+
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ mlx5e_sqs2vport_stop(esw, rep);
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ unsigned long ipv6_interval = NEIGH_VAR(&nd_tbl.parms,
+ DELAY_PROBE_TIME);
+#else
+ unsigned long ipv6_interval = ~0UL;
+#endif
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms,
+ DELAY_PROBE_TIME);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
+}
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ mlx5_fc_queue_stats_work(priv->mdev,
+ &neigh_update->neigh_stats_work,
+ neigh_update->min_interval);
+}
+
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+ neigh_update.neigh_stats_work.work);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ rtnl_lock();
+ if (!list_empty(&rpriv->neigh_update.neigh_list))
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list)
+ mlx5e_tc_update_neigh_used_value(nhe);
+
+ rtnl_unlock();
+}
+
+static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+ refcount_inc(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+ if (refcount_dec_and_test(&nhe->refcnt))
+ kfree(nhe);
+}
+
+static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN])
+{
+ struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+
+ ASSERT_RTNL();
+
+ if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) ||
+ !ether_addr_equal(e->h_dest, ha))
+ mlx5e_tc_encap_flows_del(priv, e);
+
+ if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ ether_addr_copy(e->h_dest, ha);
+ ether_addr_copy(eth->h_dest, ha);
+
+ mlx5e_tc_encap_flows_add(priv, e);
+ }
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+ struct mlx5e_neigh_hash_entry *nhe =
+ container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
+ struct neighbour *n = nhe->n;
+ struct mlx5e_encap_entry *e;
+ unsigned char ha[ETH_ALEN];
+ struct mlx5e_priv *priv;
+ bool neigh_connected;
+ bool encap_connected;
+ u8 nud_state, dead;
+
+ rtnl_lock();
+
+ /* If these parameters are changed after we release the lock,
+ * we'll receive another event letting us know about it.
+ * We use this lock to avoid inconsistency between the neigh validity
+ * and it's hw address.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+
+ neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+ list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+ priv = netdev_priv(e->out_dev);
+
+ if (encap_connected != neigh_connected ||
+ !ether_addr_equal(e->h_dest, ha))
+ mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+ }
+ mlx5e_rep_neigh_entry_release(nhe);
+ rtnl_unlock();
+ neigh_release(n);
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh);
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ neigh_update.netevent_nb);
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+ struct mlx5e_neigh m_neigh = {};
+ struct neigh_parms *p;
+ struct neighbour *n;
+ bool found = false;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (n->tbl != &nd_tbl && n->tbl != &arp_tbl)
+#else
+ if (n->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ m_neigh.dev = n->dev;
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ /* We are in atomic context and can't take RTNL mutex, so use
+ * spin_lock_bh to lookup the neigh table. bh is used since
+ * netevent can be called from a softirq context.
+ */
+ spin_lock_bh(&neigh_update->encap_lock);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ if (!nhe) {
+ spin_unlock_bh(&neigh_update->encap_lock);
+ return NOTIFY_DONE;
+ }
+
+ /* This assignment is valid as long as the the neigh reference
+ * is taken
+ */
+ nhe->n = n;
+
+ /* Take a reference to ensure the neighbour and mlx5 encap
+ * entry won't be destructed until we drop the reference in
+ * delayed work.
+ */
+ neigh_hold(n);
+ mlx5e_rep_neigh_entry_hold(nhe);
+
+ if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
+ mlx5e_rep_neigh_entry_release(nhe);
+ neigh_release(n);
+ }
+ spin_unlock_bh(&neigh_update->encap_lock);
+ break;
+
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We check the device is present since we don't care about
+ * changes in the default table, we only care about changes
+ * done per device delay prob time parameter.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!p->dev || (p->tbl != &nd_tbl && p->tbl != &arp_tbl))
+#else
+ if (!p->dev || p->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ /* We are in atomic context and can't take RTNL mutex,
+ * so use spin_lock_bh to walk the neigh list and look for
+ * the relevant device. bh is used since netevent can be
+ * called from a softirq context.
+ */
+ spin_lock_bh(&neigh_update->encap_lock);
+ list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) {
+ if (p->dev == nhe->m_neigh.dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_bh(&neigh_update->encap_lock);
+ if (!found)
+ return NOTIFY_DONE;
+
+ neigh_update->min_interval = min_t(unsigned long,
+ NEIGH_VAR(p, DELAY_PROBE_TIME),
+ neigh_update->min_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev,
+ neigh_update->min_interval);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+ .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+ .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+ .key_len = sizeof(struct mlx5e_neigh),
+ .automatic_shrinking = true,
+};
+
+static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ int err;
+
+ err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ INIT_LIST_HEAD(&neigh_update->neigh_list);
+ spin_lock_init(&neigh_update->encap_lock);
+ INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+ mlx5e_rep_neigh_stats_work);
+ mlx5e_rep_neigh_update_init_interval(rpriv);
+
+ rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
+ if (err)
+ goto out_err;
+ return 0;
+
+out_err:
+ rhashtable_destroy(&neigh_update->neigh_ht);
+ return err;
+}
+
+static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+ flush_workqueue(priv->wq); /* flush neigh update works */
+
+ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+ rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ spin_lock_bh(&rpriv->neigh_update.encap_lock);
+
+ list_del(&nhe->neigh_list);
+
+ rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ spin_unlock_bh(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under RTNL lock or under the
+ * representor's encap_lock in case RTNL mutex can't be held.
+ */
+static struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+ mlx5e_neigh_ht_params);
+}
+
+static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh_hash_entry **nhe)
+{
+ int err;
+
+ *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+ if (!*nhe)
+ return -ENOMEM;
+
+ memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
+ INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
+ INIT_LIST_HEAD(&(*nhe)->encap_list);
+ refcount_set(&(*nhe)->refcnt, 1);
+
+ err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+ if (err)
+ goto out_free;
+ return 0;
+
+out_free:
+ kfree(*nhe);
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ /* The neigh hash entry must be removed from the hash table regardless
+ * of the reference count value, so it won't be found by the next
+ * neigh notification call. The neigh hash entry reference count is
+ * incremented only during creation and neigh notification calls and
+ * protects from freeing the nhe struct.
+ */
+ mlx5e_rep_neigh_entry_remove(priv, nhe);
+ mlx5e_rep_neigh_entry_release(nhe);
+}
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_neigh_hash_entry *nhe;
+ int err;
+
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+ if (!nhe) {
+ err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
+ if (err)
+ return err;
+ }
+ list_add(&e->encap_list, &nhe->encap_list);
+ return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ list_del(&e->encap_list);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+
+ if (list_empty(&nhe->encap_list))
+ mlx5e_rep_neigh_entry_destroy(priv, nhe);
+}
+
+static int mlx5e_rep_open(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_open_locked(dev);
+ if (err)
+ goto unlock;
+
+ if (!mlx5_modify_vport_admin_state(priv->mdev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, MLX5_VPORT_ADMIN_STATE_UP))
+ netif_carrier_on(dev);
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_rep_close(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int ret;
+
+ mutex_lock(&priv->state_lock);
+ mlx5_modify_vport_admin_state(priv->mdev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN);
+ ret = mlx5e_close_locked(dev);
+ mutex_unlock(&priv->state_lock);
+ return ret;
+}
+
+static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
+ char *buf, size_t len)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int ret;
+
+ ret = snprintf(buf, len, "%d", rep->vport - 1);
+ if (ret >= len)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *cls_flower, int flags)
+{
+ switch (cls_flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return mlx5e_configure_flower(priv, cls_flower, flags);
+ case TC_CLSFLOWER_DESTROY:
+ return mlx5e_delete_flower(priv, cls_flower, flags);
+ case TC_CLSFLOWER_STATS:
+ return mlx5e_stats_flower(priv, cls_flower, flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
+ priv, priv, f->extack);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_setup_tc_block(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
+
+ if (!MLX5_ESWITCH_MANAGER(priv->mdev))
+ return false;
+
+ rep = rpriv->rep;
+ if (esw->mode == SRIOV_OFFLOADS &&
+ rep && rep->vport == FDB_UPLINK_VPORT)
+ return true;
+
+ return false;
+}
+
+static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
+
+ if (!MLX5_ESWITCH_MANAGER(priv->mdev))
+ return false;
+
+ rep = rpriv->rep;
+ if (rep && rep->vport != FDB_UPLINK_VPORT)
+ return true;
+
+ return false;
+}
+
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ switch (attr_id) {
+ case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+ if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
+ return true;
+ }
+
+ return false;
+}
+
+static int
+mlx5e_get_sw_stats64(const struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+
+ mlx5e_rep_update_sw_counters(priv);
+
+ stats->rx_packets = sstats->rx_packets;
+ stats->rx_bytes = sstats->rx_bytes;
+ stats->tx_packets = sstats->tx_packets;
+ stats->tx_bytes = sstats->tx_bytes;
+
+ stats->tx_dropped = sstats->tx_queue_dropped;
+
+ return 0;
+}
+
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
+{
+ switch (attr_id) {
+ case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+ return mlx5e_get_sw_stats64(dev, sp);
+ }
+
+ return -EINVAL;
+}
+
+static void
+mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ /* update HW stats in background for next time */
+ queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+
+ memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
+}
+
+static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
+ .switchdev_port_attr_get = mlx5e_attr_get,
+};
+
+static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, NULL);
+}
+
+static const struct net_device_ops mlx5e_netdev_ops_rep = {
+ .ndo_open = mlx5e_rep_open,
+ .ndo_stop = mlx5e_rep_close,
+ .ndo_start_xmit = mlx5e_xmit,
+ .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
+ .ndo_setup_tc = mlx5e_rep_setup_tc,
+ .ndo_get_stats64 = mlx5e_rep_get_stats,
+ .ndo_has_offload_stats = mlx5e_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_get_offload_stats,
+ .ndo_change_mtu = mlx5e_change_rep_mtu,
+};
+
+static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 mtu)
+{
+ u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+ MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+
+ params->hard_mtu = MLX5E_ETH_HARD_MTU;
+ params->sw_mtu = mtu;
+ params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
+ params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+ params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
+
+ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
+
+ params->num_tc = 1;
+ params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+ mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+}
+
+static void mlx5e_build_rep_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
+
+ netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+
+ netdev->watchdog_timeo = 15 * HZ;
+
+ netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
+
+ netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
+
+ netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
+ netdev->hw_features |= NETIF_F_HW_TC;
+
+ eth_hw_addr_random(netdev);
+
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+}
+
+static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->profile = profile;
+ priv->ppriv = ppriv;
+
+ mutex_init(&priv->state_lock);
+
+ INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+ priv->channels.params.num_channels = profile->max_nch(mdev);
+
+ mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
+ mlx5e_build_rep_netdev(netdev);
+
+ mlx5e_timestamp_init(priv);
+}
+
+static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_handle *flow_rule;
+ int err;
+
+ mlx5e_init_l2_addr(priv);
+
+ err = mlx5e_create_direct_rqts(priv);
+ if (err)
+ return err;
+
+ err = mlx5e_create_direct_tirs(priv);
+ if (err)
+ goto err_destroy_direct_rqts;
+
+ flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
+ rep->vport,
+ priv->direct_tir[0].tirn);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto err_destroy_direct_tirs;
+ }
+ rpriv->vport_rx_rule = flow_rule;
+
+ return 0;
+
+err_destroy_direct_tirs:
+ mlx5e_destroy_direct_tirs(priv);
+err_destroy_direct_rqts:
+ mlx5e_destroy_direct_rqts(priv);
+ return err;
+}
+
+static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_rqts(priv);
+}
+
+static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_create_tises(priv);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+ return err;
+ }
+ return 0;
+}
+
+static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev)
+{
+#define MLX5E_PORT_REPRESENTOR_NCH 1
+ return MLX5E_PORT_REPRESENTOR_NCH;
+}
+
+static const struct mlx5e_profile mlx5e_rep_profile = {
+ .init = mlx5e_init_rep,
+ .init_rx = mlx5e_init_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_nic_tx,
+ .update_stats = mlx5e_rep_update_hw_counters,
+ .max_nch = mlx5e_get_rep_max_num_channels,
+ .update_carrier = NULL,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */,
+ .max_tc = 1,
+};
+
+/* e-Switch vport representors */
+
+static int
+mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ int err;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_add_sqs_fwd_rules(priv);
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err)
+ goto err_remove_sqs;
+
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(&rpriv->tc_ht);
+ if (err)
+ goto err_neigh_cleanup;
+
+ return 0;
+
+err_neigh_cleanup:
+ mlx5e_rep_neigh_cleanup(rpriv);
+err_remove_sqs:
+ mlx5e_remove_sqs_fwd_rules(priv);
+ return err;
+}
+
+static void
+mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_remove_sqs_fwd_rules(priv);
+
+ /* clean uplink offloaded TC rules, delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->tc_ht);
+
+ mlx5e_rep_neigh_cleanup(rpriv);
+}
+
+static int
+mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *netdev;
+ struct mlx5e_priv *upriv;
+ int err;
+
+ rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+ if (!rpriv)
+ return -ENOMEM;
+
+ netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, rpriv);
+ if (!netdev) {
+ pr_warn("Failed to create representor netdev for vport %d\n",
+ rep->vport);
+ kfree(rpriv);
+ return -EINVAL;
+ }
+
+ rpriv->netdev = netdev;
+ rpriv->rep = rep;
+ rep->rep_if[REP_ETH].priv = rpriv;
+ INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+
+ err = mlx5e_attach_netdev(netdev_priv(netdev));
+ if (err) {
+ pr_warn("Failed to attach representor netdev for vport %d\n",
+ rep->vport);
+ goto err_destroy_netdev;
+ }
+
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err) {
+ pr_warn("Failed to initialized neighbours handling for vport %d\n",
+ rep->vport);
+ goto err_detach_netdev;
+ }
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
+ upriv = netdev_priv(uplink_rpriv->netdev);
+ err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev,
+ upriv);
+ if (err)
+ goto err_neigh_cleanup;
+
+ err = register_netdev(netdev);
+ if (err) {
+ pr_warn("Failed to register representor netdev for vport %d\n",
+ rep->vport);
+ goto err_egdev_cleanup;
+ }
+
+ return 0;
+
+err_egdev_cleanup:
+ tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
+ upriv);
+
+err_neigh_cleanup:
+ mlx5e_rep_neigh_cleanup(rpriv);
+
+err_detach_netdev:
+ mlx5e_detach_netdev(netdev_priv(netdev));
+
+err_destroy_netdev:
+ mlx5e_destroy_netdev(netdev_priv(netdev));
+ kfree(rpriv);
+ return err;
+}
+
+static void
+mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *uplink_rpriv;
+ void *ppriv = priv->ppriv;
+ struct mlx5e_priv *upriv;
+
+ unregister_netdev(netdev);
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
+ REP_ETH);
+ upriv = netdev_priv(uplink_rpriv->netdev);
+ tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
+ upriv);
+ mlx5e_rep_neigh_cleanup(rpriv);
+ mlx5e_detach_netdev(priv);
+ mlx5e_destroy_netdev(priv);
+ kfree(ppriv); /* mlx5e_rep_priv */
+}
+
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+
+ return rpriv->netdev;
+}
+
+static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++) {
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5e_vport_rep_load;
+ rep_if.unload = mlx5e_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
+ mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
+ }
+}
+
+static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++)
+ mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
+}
+
+void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ struct mlx5_eswitch_rep_if rep_if;
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = priv->ppriv;
+ rpriv->netdev = priv->netdev;
+
+ rep_if.load = mlx5e_nic_rep_load;
+ rep_if.unload = mlx5e_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
+ rep_if.priv = rpriv;
+ INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+ mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
+
+ mlx5e_rep_register_vf_vports(priv); /* VFs vports */
+}
+
+void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
+ mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/
+}
+
+void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+ if (!rpriv)
+ return NULL;
+
+ rpriv->rep = &esw->offloads.vport_reps[0];
+ return rpriv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
new file mode 100644
index 000000000..844d32d5c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_REP_H__
+#define __MLX5E_REP_H__
+
+#include <net/ip_tunnels.h>
+#include <linux/rhashtable.h>
+#include "eswitch.h"
+#include "en.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+struct mlx5e_neigh_update_table {
+ struct rhashtable neigh_ht;
+ /* Save the neigh hash entries in a list in addition to the hash table
+ * (neigh_ht). In order to iterate easily over the neigh entries.
+ * Used for stats query.
+ */
+ struct list_head neigh_list;
+ /* protect lookup/remove operations */
+ spinlock_t encap_lock;
+ struct notifier_block netevent_nb;
+ struct delayed_work neigh_stats_work;
+ unsigned long min_interval; /* jiffies */
+};
+
+struct mlx5e_rep_priv {
+ struct mlx5_eswitch_rep *rep;
+ struct mlx5e_neigh_update_table neigh_update;
+ struct net_device *netdev;
+ struct mlx5_flow_handle *vport_rx_rule;
+ struct list_head vport_sqs_list;
+ struct rhashtable tc_ht; /* valid for uplink rep */
+};
+
+static inline
+struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
+{
+ return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv;
+}
+
+struct mlx5e_neigh {
+ struct net_device *dev;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } dst_ip;
+ int family;
+};
+
+struct mlx5e_neigh_hash_entry {
+ struct rhash_head rhash_node;
+ struct mlx5e_neigh m_neigh;
+
+ /* Save the neigh hash entry in a list on the representor in
+ * addition to the hash table. In order to iterate easily over the
+ * neighbour entries. Used for stats query.
+ */
+ struct list_head neigh_list;
+
+ /* encap list sharing the same neigh */
+ struct list_head encap_list;
+
+ /* valid only when the neigh reference is taken during
+ * neigh_update_work workqueue callback.
+ */
+ struct neighbour *n;
+ struct work_struct neigh_update_work;
+
+ /* neigh hash entry can be deleted only when the refcount is zero.
+ * refcount is needed to avoid neigh hash entry removal by TC, while
+ * it's used by the neigh notification call.
+ */
+ refcount_t refcnt;
+
+ /* Save the last reported time offloaded trafic pass over one of the
+ * neigh hash entry flows. Use it to periodically update the neigh
+ * 'used' value and avoid neigh deleting by the kernel.
+ */
+ unsigned long reported_lastuse;
+};
+
+enum {
+ /* set when the encap entry is successfully offloaded into HW */
+ MLX5_ENCAP_ENTRY_VALID = BIT(0),
+};
+
+struct mlx5e_encap_entry {
+ /* neigh hash entry list of encaps sharing the same neigh */
+ struct list_head encap_list;
+ struct mlx5e_neigh m_neigh;
+ /* a node of the eswitch encap hash table which keeping all the encap
+ * entries
+ */
+ struct hlist_node encap_hlist;
+ struct list_head flows;
+ u32 encap_id;
+ struct ip_tunnel_info tun_info;
+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+
+ struct net_device *out_dev;
+ int tunnel_type;
+ u8 flags;
+ char *encap_header;
+ int encap_size;
+};
+
+struct mlx5e_rep_sq {
+ struct mlx5_flow_handle *send_to_vport_rule;
+ struct list_head list;
+};
+
+void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev);
+void mlx5e_register_vport_reps(struct mlx5e_priv *priv);
+void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv);
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
+
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp);
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
+
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {}
+static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {}
+static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
+static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5E_REP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
new file mode 100644
index 000000000..9d86e49a7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -0,0 +1,1470 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/prefetch.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <net/busy_poll.h>
+#include <net/ip6_checksum.h>
+#include <net/page_pool.h>
+#include <net/inet_ecn.h>
+#include "en.h"
+#include "en_tc.h"
+#include "eswitch.h"
+#include "en_rep.h"
+#include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls_rxtx.h"
+#include "lib/clock.h"
+#include "en/xdp.h"
+
+static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
+{
+ return config->rx_filter == HWTSTAMP_FILTER_ALL;
+}
+
+static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
+ void *data)
+{
+ u32 ci = mlx5_cqwq_ctr2ix(&cq->wq, cqcc);
+
+ memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
+}
+
+static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq, u32 cqcc)
+{
+ mlx5e_read_cqe_slot(cq, cqcc, &cq->title);
+ cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt);
+ cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter);
+ rq->stats->cqe_compress_blks++;
+}
+
+static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
+{
+ mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr);
+ cq->mini_arr_idx = 0;
+}
+
+static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
+{
+ struct mlx5_cqwq *wq = &cq->wq;
+
+ u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1;
+ u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
+ u32 wq_sz = mlx5_cqwq_get_size(wq);
+ u32 ci_top = min_t(u32, wq_sz, ci + n);
+
+ for (; ci < ci_top; ci++, n--) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+
+ cqe->op_own = op_own;
+ }
+
+ if (unlikely(ci == wq_sz)) {
+ op_own = !op_own;
+ for (ci = 0; ci < n; ci++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+
+ cqe->op_own = op_own;
+ }
+ }
+}
+
+static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq, u32 cqcc)
+{
+ cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
+ cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum;
+ cq->title.op_own &= 0xf0;
+ cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
+ cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter);
+
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ cq->decmprs_wqe_counter +=
+ mpwrq_get_cqe_consumed_strides(&cq->title);
+ else
+ cq->decmprs_wqe_counter =
+ mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1);
+}
+
+static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq, u32 cqcc)
+{
+ mlx5e_decompress_cqe(rq, cq, cqcc);
+ cq->title.rss_hash_type = 0;
+ cq->title.rss_hash_result = 0;
+}
+
+static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq,
+ int update_owner_only,
+ int budget_rem)
+{
+ u32 cqcc = cq->wq.cc + update_owner_only;
+ u32 cqe_count;
+ u32 i;
+
+ cqe_count = min_t(u32, cq->decmprs_left, budget_rem);
+
+ for (i = update_owner_only; i < cqe_count;
+ i++, cq->mini_arr_idx++, cqcc++) {
+ if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
+ mlx5e_read_mini_arr_slot(cq, cqcc);
+
+ mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
+ rq->handle_rx_cqe(rq, &cq->title);
+ }
+ mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc);
+ cq->wq.cc = cqcc;
+ cq->decmprs_left -= cqe_count;
+ rq->stats->cqe_compress_pkts += cqe_count;
+
+ return cqe_count;
+}
+
+static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq,
+ int budget_rem)
+{
+ mlx5e_read_title_slot(rq, cq, cq->wq.cc);
+ mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1);
+ mlx5e_decompress_cqe(rq, cq, cq->wq.cc);
+ rq->handle_rx_cqe(rq, &cq->title);
+ cq->mini_arr_idx++;
+
+ return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1;
+}
+
+static inline bool mlx5e_page_is_reserved(struct page *page)
+{
+ return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id();
+}
+
+static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ struct mlx5e_page_cache *cache = &rq->page_cache;
+ u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ if (tail_next == cache->head) {
+ stats->cache_full++;
+ return false;
+ }
+
+ if (unlikely(mlx5e_page_is_reserved(dma_info->page))) {
+ stats->cache_waive++;
+ return false;
+ }
+
+ cache->page_cache[cache->tail] = *dma_info;
+ cache->tail = tail_next;
+ return true;
+}
+
+static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ struct mlx5e_page_cache *cache = &rq->page_cache;
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ if (unlikely(cache->head == cache->tail)) {
+ stats->cache_empty++;
+ return false;
+ }
+
+ if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
+ stats->cache_busy++;
+ return false;
+ }
+
+ *dma_info = cache->page_cache[cache->head];
+ cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
+ stats->cache_reuse++;
+
+ dma_sync_single_for_device(rq->pdev, dma_info->addr,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ return true;
+}
+
+static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ if (mlx5e_rx_cache_get(rq, dma_info))
+ return 0;
+
+ dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
+ if (unlikely(!dma_info->page))
+ return -ENOMEM;
+
+ dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
+ PAGE_SIZE, rq->buff.map_dir);
+ if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
+ put_page(dma_info->page);
+ dma_info->page = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
+{
+ dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
+}
+
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+ bool recycle)
+{
+ if (likely(recycle)) {
+ if (mlx5e_rx_cache_put(rq, dma_info))
+ return;
+
+ mlx5e_page_dma_unmap(rq, dma_info);
+ page_pool_recycle_direct(rq->page_pool, dma_info->page);
+ } else {
+ mlx5e_page_dma_unmap(rq, dma_info);
+ put_page(dma_info->page);
+ }
+}
+
+static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *frag)
+{
+ int err = 0;
+
+ if (!frag->offset)
+ /* On first frag (offset == 0), replenish page (dma_info actually).
+ * Other frags that point to the same dma_info (with a different
+ * offset) should just use the new one without replenishing again
+ * by themselves.
+ */
+ err = mlx5e_page_alloc_mapped(rq, frag->di);
+
+ return err;
+}
+
+static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *frag,
+ bool recycle)
+{
+ if (frag->last_in_page)
+ mlx5e_page_release(rq, frag->di, recycle);
+}
+
+static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
+{
+ return &rq->wqe.frags[ix << rq->wqe.info.log_num_frags];
+}
+
+static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
+ u16 ix)
+{
+ struct mlx5e_wqe_frag_info *frag = get_frag(rq, ix);
+ int err;
+ int i;
+
+ for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) {
+ err = mlx5e_get_rx_frag(rq, frag);
+ if (unlikely(err))
+ goto free_frags;
+
+ wqe->data[i].addr = cpu_to_be64(frag->di->addr +
+ frag->offset + rq->buff.headroom);
+ }
+
+ return 0;
+
+free_frags:
+ while (--i >= 0)
+ mlx5e_put_rx_frag(rq, --frag, true);
+
+ return err;
+}
+
+static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi,
+ bool recycle)
+{
+ int i;
+
+ for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
+ mlx5e_put_rx_frag(rq, wi, recycle);
+}
+
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
+
+ mlx5e_free_rx_wqe(rq, wi, false);
+}
+
+static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int err;
+ int i;
+
+ for (i = 0; i < wqe_bulk; i++) {
+ struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
+
+ err = mlx5e_alloc_rx_wqe(rq, wqe, ix + i);
+ if (unlikely(err))
+ goto free_wqes;
+ }
+
+ return 0;
+
+free_wqes:
+ while (--i >= 0)
+ mlx5e_dealloc_rx_wqe(rq, ix + i);
+
+ return err;
+}
+
+static inline void
+mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
+ struct mlx5e_dma_info *di, u32 frag_offset, u32 len,
+ unsigned int truesize)
+{
+ dma_sync_single_for_cpu(rq->pdev,
+ di->addr + frag_offset,
+ len, DMA_FROM_DEVICE);
+ page_ref_inc(di->page);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ di->page, frag_offset, len, truesize);
+}
+
+static inline void
+mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
+ struct mlx5e_dma_info *dma_info,
+ int offset_from, int offset_to, u32 headlen)
+{
+ const void *from = page_address(dma_info->page) + offset_from;
+ /* Aligning len to sizeof(long) optimizes memcpy performance */
+ unsigned int len = ALIGN(headlen, sizeof(long));
+
+ dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len,
+ DMA_FROM_DEVICE);
+ skb_copy_to_linear_data_offset(skb, offset_to, from, len);
+}
+
+static inline void
+mlx5e_copy_skb_header_mpwqe(struct device *pdev,
+ struct sk_buff *skb,
+ struct mlx5e_dma_info *dma_info,
+ u32 offset, u32 headlen)
+{
+ u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset);
+
+ mlx5e_copy_skb_header(pdev, skb, dma_info, offset, 0, headlen_pg);
+
+ if (unlikely(offset + headlen > PAGE_SIZE)) {
+ dma_info++;
+ mlx5e_copy_skb_header(pdev, skb, dma_info, 0, headlen_pg,
+ headlen - headlen_pg);
+ }
+}
+
+static void
+mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
+{
+ const bool no_xdp_xmit =
+ bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
+ int i;
+
+ for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
+ if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
+ mlx5e_page_release(rq, &dma_info[i], recycle);
+}
+
+static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+ struct mlx5e_rx_wqe_ll *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
+
+ rq->mpwqe.umr_in_progress = false;
+
+ mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
+
+ /* ensure wqes are visible to device before updating doorbell record */
+ dma_wmb();
+
+ mlx5_wq_ll_update_db_record(wq);
+}
+
+static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
+{
+ return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+}
+
+static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
+ struct mlx5_wq_cyc *wq,
+ u16 pi, u16 nnops)
+{
+ struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
+
+ edge_wi = wi + nnops;
+
+ /* fill sq frag edge with nops to avoid wqe wrapping two pages */
+ for (; wi < edge_wi; wi++) {
+ wi->opcode = MLX5_OPCODE_NOP;
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+}
+
+static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
+ struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
+ struct mlx5e_icosq *sq = &rq->channel->icosq;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_umr_wqe *umr_wqe;
+ u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
+ u16 pi, contig_wqebbs_room;
+ int err;
+ int i;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+ mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2))
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe,
+ offsetof(struct mlx5e_umr_wqe, inline_mtts));
+
+ for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
+ err = mlx5e_page_alloc_mapped(rq, dma_info);
+ if (unlikely(err))
+ goto err_unmap;
+ umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
+ }
+
+ bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ wi->consumed_strides = 0;
+
+ rq->mpwqe.umr_in_progress = true;
+
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_UMR);
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
+
+ sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+ sq->pc += MLX5E_UMR_WQEBBS;
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &umr_wqe->ctrl);
+
+ return 0;
+
+err_unmap:
+ while (--i >= 0) {
+ dma_info--;
+ mlx5e_page_release(rq, dma_info, true);
+ }
+ rq->stats->buff_alloc_err++;
+
+ return err;
+}
+
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
+ /* Don't recycle, this function is called on rq/netdev close */
+ mlx5e_free_rx_mpwqe(rq, wi, false);
+}
+
+bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ u8 wqe_bulk;
+ int err;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return false;
+
+ wqe_bulk = rq->wqe.info.wqe_bulk;
+
+ if (mlx5_wq_cyc_missing(wq) < wqe_bulk)
+ return false;
+
+ do {
+ u16 head = mlx5_wq_cyc_get_head(wq);
+
+ err = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
+ if (unlikely(err)) {
+ rq->stats->buff_alloc_err++;
+ break;
+ }
+
+ mlx5_wq_cyc_push_n(wq, wqe_bulk);
+ } while (mlx5_wq_cyc_missing(wq) >= wqe_bulk);
+
+ /* ensure wqes are visible to device before updating doorbell record */
+ dma_wmb();
+
+ mlx5_wq_cyc_update_db_record(wq);
+
+ return !!err;
+}
+
+static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
+ struct mlx5e_icosq *sq,
+ struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
+ netdev_WARN_ONCE(cq->channel->netdev,
+ "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own);
+ return;
+ }
+
+ if (likely(icowi->opcode == MLX5_OPCODE_UMR)) {
+ mlx5e_post_rx_mpwqe(rq);
+ return;
+ }
+
+ if (unlikely(icowi->opcode != MLX5_OPCODE_NOP))
+ netdev_WARN_ONCE(cq->channel->netdev,
+ "Bad OPCODE in ICOSQ WQE info: 0x%x\n", icowi->opcode);
+}
+
+static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+{
+ struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
+ struct mlx5_cqe64 *cqe;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (likely(!cqe))
+ return;
+
+ /* by design, there's only a single cqe */
+ mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe);
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+}
+
+bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return false;
+
+ mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq);
+
+ if (mlx5_wq_ll_is_full(wq))
+ return false;
+
+ if (!rq->mpwqe.umr_in_progress)
+ mlx5e_alloc_rx_mpwqe(rq, wq->head);
+ else
+ rq->stats->congst_umr += mlx5_wq_ll_missing(wq) > 2;
+
+ return false;
+}
+
+static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
+{
+ u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+ u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+ (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
+
+ tcp->check = 0;
+ tcp->psh = get_cqe_lro_tcppsh(cqe);
+
+ if (tcp_ack) {
+ tcp->ack = 1;
+ tcp->ack_seq = cqe->lro_ack_seq_num;
+ tcp->window = cqe->lro_tcp_win;
+ }
+}
+
+static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt)
+{
+ struct ethhdr *eth = (struct ethhdr *)(skb->data);
+ struct tcphdr *tcp;
+ int network_depth = 0;
+ __wsum check;
+ __be16 proto;
+ u16 tot_len;
+ void *ip_p;
+
+ proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
+
+ tot_len = cqe_bcnt - network_depth;
+ ip_p = skb->data + network_depth;
+
+ if (proto == htons(ETH_P_IP)) {
+ struct iphdr *ipv4 = ip_p;
+
+ tcp = ip_p + sizeof(struct iphdr);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ ipv4->ttl = cqe->lro_min_ttl;
+ ipv4->tot_len = cpu_to_be16(tot_len);
+ ipv4->check = 0;
+ ipv4->check = ip_fast_csum((unsigned char *)ipv4,
+ ipv4->ihl);
+
+ mlx5e_lro_update_tcp_hdr(cqe, tcp);
+ check = csum_partial(tcp, tcp->doff * 4,
+ csum_unfold((__force __sum16)cqe->check_sum));
+ /* Almost done, don't forget the pseudo header */
+ tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr,
+ tot_len - sizeof(struct iphdr),
+ IPPROTO_TCP, check);
+ } else {
+ u16 payload_len = tot_len - sizeof(struct ipv6hdr);
+ struct ipv6hdr *ipv6 = ip_p;
+
+ tcp = ip_p + sizeof(struct ipv6hdr);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+
+ ipv6->hop_limit = cqe->lro_min_ttl;
+ ipv6->payload_len = cpu_to_be16(payload_len);
+
+ mlx5e_lro_update_tcp_hdr(cqe, tcp);
+ check = csum_partial(tcp, tcp->doff * 4,
+ csum_unfold((__force __sum16)cqe->check_sum));
+ /* Almost done, don't forget the pseudo header */
+ tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len,
+ IPPROTO_TCP, check);
+ }
+}
+
+static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb)
+{
+ u8 cht = cqe->rss_hash_type;
+ int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 :
+ (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 :
+ PKT_HASH_TYPE_NONE;
+ skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
+}
+
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
+ __be16 *proto)
+{
+ *proto = ((struct ethhdr *)skb->data)->h_proto;
+ *proto = __vlan_get_protocol(skb, *proto, network_depth);
+
+ if (*proto == htons(ETH_P_IP))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
+
+ if (*proto == htons(ETH_P_IPV6))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
+
+ return false;
+}
+
+static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
+{
+ int network_depth = 0;
+ __be16 proto;
+ void *ip;
+ int rc;
+
+ if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
+ return;
+
+ ip = skb->data + network_depth;
+ rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
+ IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
+
+ rq->stats->ecn_mark += !!rc;
+}
+
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
+{
+ void *ip_p = skb->data + network_depth;
+
+ return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+ ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
+#define MAX_PADDING 8
+
+static void
+tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
+ struct mlx5e_rq_stats *stats)
+{
+ stats->csum_complete_tail_slow++;
+ skb->csum = csum_block_add(skb->csum,
+ skb_checksum(skb, offset, len, 0),
+ offset);
+}
+
+static void
+tail_padding_csum(struct sk_buff *skb, int offset,
+ struct mlx5e_rq_stats *stats)
+{
+ u8 tail_padding[MAX_PADDING];
+ int len = skb->len - offset;
+ void *tail;
+
+ if (unlikely(len > MAX_PADDING)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ tail = skb_header_pointer(skb, offset, len, tail_padding);
+ if (unlikely(!tail)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ stats->csum_complete_tail++;
+ skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
+}
+
+static void
+mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto,
+ struct mlx5e_rq_stats *stats)
+{
+ struct ipv6hdr *ip6;
+ struct iphdr *ip4;
+ int pkt_len;
+
+ switch (proto) {
+ case htons(ETH_P_IP):
+ ip4 = (struct iphdr *)(skb->data + network_depth);
+ pkt_len = network_depth + ntohs(ip4->tot_len);
+ break;
+ case htons(ETH_P_IPV6):
+ ip6 = (struct ipv6hdr *)(skb->data + network_depth);
+ pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+ break;
+ default:
+ return;
+ }
+
+ if (likely(pkt_len >= skb->len))
+ return;
+
+ tail_padding_csum(skb, pkt_len, stats);
+}
+
+static inline void mlx5e_handle_csum(struct net_device *netdev,
+ struct mlx5_cqe64 *cqe,
+ struct mlx5e_rq *rq,
+ struct sk_buff *skb,
+ bool lro)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+ int network_depth = 0;
+ __be16 proto;
+
+ if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
+ goto csum_none;
+
+ if (lro) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ stats->csum_unnecessary++;
+ return;
+ }
+
+ /* True when explicitly set via priv flag, or XDP prog is loaded */
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+ goto csum_unnecessary;
+
+ /* CQE csum doesn't cover padding octets in short ethernet
+ * frames. And the pad field is appended prior to calculating
+ * and appending the FCS field.
+ *
+ * Detecting these padded frames requires to verify and parse
+ * IP headers, so we simply force all those small frames to be
+ * CHECKSUM_UNNECESSARY even if they are not padded.
+ */
+ if (short_frame(skb->len))
+ goto csum_unnecessary;
+
+ if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
+ if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
+ goto csum_unnecessary;
+
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+ if (network_depth > ETH_HLEN)
+ /* CQE csum is calculated from the IP header and does
+ * not cover VLAN headers (if present). This will add
+ * the checksum manually.
+ */
+ skb->csum = csum_partial(skb->data + ETH_HLEN,
+ network_depth - ETH_HLEN,
+ skb->csum);
+
+ mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
+ stats->csum_complete++;
+ return;
+ }
+
+csum_unnecessary:
+ if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
+ (cqe->hds_ip_ext & CQE_L4_OK))) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (cqe_is_tunneled(cqe)) {
+ skb->csum_level = 1;
+ skb->encapsulation = 1;
+ stats->csum_unnecessary_inner++;
+ return;
+ }
+ stats->csum_unnecessary++;
+ return;
+ }
+csum_none:
+ skb->ip_summed = CHECKSUM_NONE;
+ stats->csum_none++;
+}
+
+#define MLX5E_CE_BIT_MASK 0x80
+
+static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct mlx5e_rq *rq,
+ struct sk_buff *skb)
+{
+ u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct net_device *netdev = rq->netdev;
+
+ skb->mac_len = ETH_HLEN;
+
+#ifdef CONFIG_MLX5_EN_TLS
+ mlx5e_tls_handle_rx_skb(netdev, skb, &cqe_bcnt);
+#endif
+
+ if (lro_num_seg > 1) {
+ mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
+ skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
+ /* Subtract one since we already counted this as one
+ * "regular" packet in mlx5e_complete_rx_cqe()
+ */
+ stats->packets += lro_num_seg - 1;
+ stats->lro_packets++;
+ stats->lro_bytes += cqe_bcnt;
+ }
+
+ if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
+ skb_hwtstamps(skb)->hwtstamp =
+ mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
+
+ skb_record_rx_queue(skb, rq->ix);
+
+ if (likely(netdev->features & NETIF_F_RXHASH))
+ mlx5e_skb_set_hash(cqe, skb);
+
+ if (cqe_has_vlan(cqe)) {
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ be16_to_cpu(cqe->vlan_info));
+ stats->removed_vlan_packets++;
+ }
+
+ skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
+
+ mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
+ /* checking CE bit in cqe - MSB in ml_path field */
+ if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
+ mlx5e_enable_ecn(rq, skb);
+
+ skb->protocol = eth_type_trans(skb, netdev);
+}
+
+static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct sk_buff *skb)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->packets++;
+ stats->bytes += cqe_bcnt;
+ mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
+}
+
+static inline
+struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
+ u32 frag_size, u16 headroom,
+ u32 cqe_bcnt)
+{
+ struct sk_buff *skb = build_skb(va, frag_size);
+
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ skb_reserve(skb, headroom);
+ skb_put(skb, cqe_bcnt);
+
+ return skb;
+}
+
+struct sk_buff *
+mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
+{
+ struct mlx5e_dma_info *di = wi->di;
+ u16 rx_headroom = rq->buff.headroom;
+ struct sk_buff *skb;
+ void *va, *data;
+ bool consumed;
+ u32 frag_size;
+
+ va = page_address(di->page) + wi->offset;
+ data = va + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+
+ dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
+ frag_size, DMA_FROM_DEVICE);
+ prefetchw(va); /* xdp_frame data area */
+ prefetch(data);
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ rq->stats->wqe_err++;
+ return NULL;
+ }
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt);
+ rcu_read_unlock();
+ if (consumed)
+ return NULL; /* page/packet was consumed by XDP */
+
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* queue up for recycling/reuse */
+ page_ref_inc(di->page);
+
+ return skb;
+}
+
+struct sk_buff *
+mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
+{
+ struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
+ struct mlx5e_wqe_frag_info *head_wi = wi;
+ u16 headlen = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt);
+ u16 frag_headlen = headlen;
+ u16 byte_cnt = cqe_bcnt - headlen;
+ struct sk_buff *skb;
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ rq->stats->wqe_err++;
+ return NULL;
+ }
+
+ /* XDP is not supported in this configuration, as incoming packets
+ * might spread among multiple pages.
+ */
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ prefetchw(skb->data);
+
+ while (byte_cnt) {
+ u16 frag_consumed_bytes =
+ min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt);
+
+ mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen,
+ frag_consumed_bytes, frag_info->frag_stride);
+ byte_cnt -= frag_consumed_bytes;
+ frag_headlen = 0;
+ frag_info++;
+ wi++;
+ }
+
+ /* copy header */
+ mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset,
+ 0, headlen);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += headlen;
+ skb->len += headlen;
+
+ return skb;
+}
+
+void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (!skb) {
+ /* probably for XDP */
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ /* do not return page to cache,
+ * it will be returned on XDP_TX completion.
+ */
+ goto wq_cyc_pop;
+ }
+ goto free_wqe;
+ }
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ napi_gro_receive(rq->cq.napi, skb);
+
+free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, true);
+wq_cyc_pop:
+ mlx5_wq_cyc_pop(wq);
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct net_device *netdev = rq->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (!skb) {
+ /* probably for XDP */
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ /* do not return page to cache,
+ * it will be returned on XDP_TX completion.
+ */
+ goto wq_cyc_pop;
+ }
+ goto free_wqe;
+ }
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ if (rep->vlan && skb_vlan_tag_present(skb))
+ skb_vlan_pop(skb);
+
+ napi_gro_receive(rq->cq.napi, skb);
+
+free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, true);
+wq_cyc_pop:
+ mlx5_wq_cyc_pop(wq);
+}
+#endif
+
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx)
+{
+ u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
+ struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+ u32 frag_offset = head_offset + headlen;
+ u32 byte_cnt = cqe_bcnt - headlen;
+ struct mlx5e_dma_info *head_di = di;
+ struct sk_buff *skb;
+
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ prefetchw(skb->data);
+
+ if (unlikely(frag_offset >= PAGE_SIZE)) {
+ di++;
+ frag_offset -= PAGE_SIZE;
+ }
+
+ while (byte_cnt) {
+ u32 pg_consumed_bytes =
+ min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
+ unsigned int truesize =
+ ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+
+ mlx5e_add_skb_frag(rq, skb, di, frag_offset,
+ pg_consumed_bytes, truesize);
+ byte_cnt -= pg_consumed_bytes;
+ frag_offset = 0;
+ di++;
+ }
+ /* copy header */
+ mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di,
+ head_offset, headlen);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += headlen;
+ skb->len += headlen;
+
+ return skb;
+}
+
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx)
+{
+ struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+ u16 rx_headroom = rq->buff.headroom;
+ u32 cqe_bcnt32 = cqe_bcnt;
+ struct sk_buff *skb;
+ void *va, *data;
+ u32 frag_size;
+ bool consumed;
+
+ /* Check packet size. Note LRO doesn't use linear SKB */
+ if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+ rq->stats->oversize_pkts_sw_drop++;
+ return NULL;
+ }
+
+ va = page_address(di->page) + head_offset;
+ data = va + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
+
+ dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
+ frag_size, DMA_FROM_DEVICE);
+ prefetchw(va); /* xdp_frame data area */
+ prefetch(data);
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32);
+ rcu_read_unlock();
+ if (consumed) {
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* queue up for recycling/reuse */
+ page_ref_inc(di->page);
+
+ return skb;
+}
+
+void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
+ u16 wqe_id = be16_to_cpu(cqe->wqe_id);
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
+ u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
+ u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
+ u32 head_offset = wqe_offset & (PAGE_SIZE - 1);
+ u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ struct mlx5e_rx_wqe_ll *wqe;
+ struct mlx5_wq_ll *wq;
+ struct sk_buff *skb;
+ u16 cqe_bcnt;
+
+ wi->consumed_strides += cstrides;
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ rq->stats->wqe_err++;
+ goto mpwrq_cqe_out;
+ }
+
+ if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->mpwqe_filler_cqes++;
+ stats->mpwqe_filler_strides += cstrides;
+ goto mpwrq_cqe_out;
+ }
+
+ cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
+
+ skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset,
+ page_idx);
+ if (!skb)
+ goto mpwrq_cqe_out;
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ napi_gro_receive(rq->cq.napi, skb);
+
+mpwrq_cqe_out:
+ if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+ return;
+
+ wq = &rq->mpwqe.wq;
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+ mlx5e_free_rx_mpwqe(rq, wi, true);
+ mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
+int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
+{
+ struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
+ struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+ struct mlx5_cqe64 *cqe;
+ int work_done = 0;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return 0;
+
+ if (cq->decmprs_left) {
+ work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
+ if (cq->decmprs_left || work_done >= budget)
+ goto out;
+ }
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe) {
+ if (unlikely(work_done))
+ goto out;
+ return 0;
+ }
+
+ do {
+ if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
+ work_done +=
+ mlx5e_decompress_cqes_start(rq, cq,
+ budget - work_done);
+ continue;
+ }
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ rq->handle_rx_cqe(rq, cqe);
+ } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+out:
+ if (xdpsq->doorbell) {
+ mlx5e_xmit_xdp_doorbell(xdpsq);
+ xdpsq->doorbell = false;
+ }
+
+ if (xdpsq->redirect_flush) {
+ xdp_do_flush_map();
+ xdpsq->redirect_flush = false;
+ }
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ return work_done;
+}
+
+#ifdef CONFIG_MLX5_CORE_IPOIB
+
+#define MLX5_IB_GRH_SGID_OFFSET 8
+#define MLX5_IB_GRH_DGID_OFFSET 24
+#define MLX5_GID_SIZE 16
+
+static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct sk_buff *skb)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct hwtstamp_config *tstamp;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ char *pseudo_header;
+ u32 flags_rqpn;
+ u32 qpn;
+ u8 *dgid;
+ u8 g;
+
+ qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff;
+ netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn);
+
+ /* No mapping present, cannot process SKB. This might happen if a child
+ * interface is going down while having unprocessed CQEs on parent RQ
+ */
+ if (unlikely(!netdev)) {
+ /* TODO: add drop counters support */
+ skb->dev = NULL;
+ pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn);
+ return;
+ }
+
+ priv = mlx5i_epriv(netdev);
+ tstamp = &priv->tstamp;
+
+ flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
+ g = (flags_rqpn >> 28) & 3;
+ dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
+ if ((!g) || dgid[0] != 0xff)
+ skb->pkt_type = PACKET_HOST;
+ else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0)
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+
+ /* Drop packets that this interface sent, ie multicast packets
+ * that the HCA has replicated.
+ */
+ if (g && (qpn == (flags_rqpn & 0xffffff)) &&
+ (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET,
+ MLX5_GID_SIZE) == 0)) {
+ skb->dev = NULL;
+ return;
+ }
+
+ skb_pull(skb, MLX5_IB_GRH_BYTES);
+
+ skb->protocol = *((__be16 *)(skb->data));
+
+ if (netdev->features & NETIF_F_RXCSUM) {
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+ stats->csum_complete++;
+ } else {
+ skb->ip_summed = CHECKSUM_NONE;
+ stats->csum_none++;
+ }
+
+ if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
+ skb_hwtstamps(skb)->hwtstamp =
+ mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
+
+ skb_record_rx_queue(skb, rq->ix);
+
+ if (likely(netdev->features & NETIF_F_RXHASH))
+ mlx5e_skb_set_hash(cqe, skb);
+
+ /* 20 bytes of ipoib header and 4 for encap existing */
+ pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN);
+ memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN);
+ skb_reset_mac_header(skb);
+ skb_pull(skb, MLX5_IPOIB_HARD_LEN);
+
+ skb->dev = netdev;
+
+ stats->packets++;
+ stats->bytes += cqe_bcnt;
+}
+
+void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (!skb)
+ goto wq_free_wqe;
+
+ mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ if (unlikely(!skb->dev)) {
+ dev_kfree_skb_any(skb);
+ goto wq_free_wqe;
+ }
+ napi_gro_receive(rq->cq.napi, skb);
+
+wq_free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, true);
+ mlx5_wq_cyc_pop(wq);
+}
+
+#endif /* CONFIG_MLX5_CORE_IPOIB */
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (unlikely(!skb)) {
+ /* a DROP, save the page-reuse checks */
+ mlx5e_free_rx_wqe(rq, wi, true);
+ goto wq_cyc_pop;
+ }
+ skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt);
+ if (unlikely(!skb)) {
+ mlx5e_free_rx_wqe(rq, wi, true);
+ goto wq_cyc_pop;
+ }
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ napi_gro_receive(rq->cq.napi, skb);
+
+ mlx5e_free_rx_wqe(rq, wi, true);
+wq_cyc_pop:
+ mlx5_wq_cyc_pop(wq);
+}
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
new file mode 100644
index 000000000..5fb088b54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/prefetch.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/udp.h>
+#include "en.h"
+#include "en/port.h"
+
+enum {
+ MLX5E_ST_LINK_STATE,
+ MLX5E_ST_LINK_SPEED,
+ MLX5E_ST_HEALTH_INFO,
+#ifdef CONFIG_INET
+ MLX5E_ST_LOOPBACK,
+#endif
+ MLX5E_ST_NUM,
+};
+
+const char mlx5e_self_tests[MLX5E_ST_NUM][ETH_GSTRING_LEN] = {
+ "Link Test",
+ "Speed Test",
+ "Health Test",
+#ifdef CONFIG_INET
+ "Loopback Test",
+#endif
+};
+
+int mlx5e_self_test_num(struct mlx5e_priv *priv)
+{
+ return ARRAY_SIZE(mlx5e_self_tests);
+}
+
+static int mlx5e_test_health_info(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_health *health = &priv->mdev->priv.health;
+
+ return health->sick ? 1 : 0;
+}
+
+static int mlx5e_test_link_state(struct mlx5e_priv *priv)
+{
+ u8 port_state;
+
+ if (!netif_carrier_ok(priv->netdev))
+ return 1;
+
+ port_state = mlx5_query_vport_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0);
+ return port_state == VPORT_STATE_UP ? 0 : 1;
+}
+
+static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
+{
+ u32 speed;
+
+ if (!netif_carrier_ok(priv->netdev))
+ return 1;
+
+ return mlx5e_port_linkspeed(priv->mdev, &speed);
+}
+
+struct mlx5ehdr {
+ __be32 version;
+ __be64 magic;
+};
+
+#ifdef CONFIG_INET
+/* loopback test */
+#define MLX5E_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) +\
+ sizeof(struct udphdr) + sizeof(struct mlx5ehdr))
+#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL
+
+static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
+{
+ struct sk_buff *skb = NULL;
+ struct mlx5ehdr *mlxh;
+ struct ethhdr *ethh;
+ struct udphdr *udph;
+ struct iphdr *iph;
+ int iplen;
+
+ skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE);
+ if (!skb) {
+ netdev_err(priv->netdev, "\tFailed to alloc loopback skb\n");
+ return NULL;
+ }
+
+ prefetchw(skb->data);
+ skb_reserve(skb, NET_IP_ALIGN);
+
+ /* Reserve for ethernet and IP header */
+ ethh = skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+
+ skb_set_network_header(skb, skb->len);
+ iph = skb_put(skb, sizeof(struct iphdr));
+
+ skb_set_transport_header(skb, skb->len);
+ udph = skb_put(skb, sizeof(struct udphdr));
+
+ /* Fill ETH header */
+ ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr);
+ eth_zero_addr(ethh->h_source);
+ ethh->h_proto = htons(ETH_P_IP);
+
+ /* Fill UDP header */
+ udph->source = htons(9);
+ udph->dest = htons(9); /* Discard Protocol */
+ udph->len = htons(sizeof(struct mlx5ehdr) + sizeof(struct udphdr));
+ udph->check = 0;
+
+ /* Fill IP header */
+ iph->ihl = 5;
+ iph->ttl = 32;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iplen = sizeof(struct iphdr) + sizeof(struct udphdr) +
+ sizeof(struct mlx5ehdr);
+ iph->tot_len = htons(iplen);
+ iph->frag_off = 0;
+ iph->saddr = 0;
+ iph->daddr = 0;
+ iph->tos = 0;
+ iph->id = 0;
+ ip_send_check(iph);
+
+ /* Fill test header and data */
+ mlxh = skb_put(skb, sizeof(*mlxh));
+ mlxh->version = 0;
+ mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC);
+
+ skb->csum = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ udp4_hwcsum(skb, iph->saddr, iph->daddr);
+
+ skb->protocol = htons(ETH_P_IP);
+ skb->pkt_type = PACKET_HOST;
+ skb->dev = priv->netdev;
+
+ return skb;
+}
+
+struct mlx5e_lbt_priv {
+ struct packet_type pt;
+ struct completion comp;
+ bool loopback_ok;
+ bool local_lb;
+};
+
+static int
+mlx5e_test_loopback_validate(struct sk_buff *skb,
+ struct net_device *ndev,
+ struct packet_type *pt,
+ struct net_device *orig_ndev)
+{
+ struct mlx5e_lbt_priv *lbtp = pt->af_packet_priv;
+ struct mlx5ehdr *mlxh;
+ struct ethhdr *ethh;
+ struct udphdr *udph;
+ struct iphdr *iph;
+
+ /* We are only going to peek, no need to clone the SKB */
+ if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
+ goto out;
+
+ ethh = (struct ethhdr *)skb_mac_header(skb);
+ if (!ether_addr_equal(ethh->h_dest, orig_ndev->dev_addr))
+ goto out;
+
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_UDP)
+ goto out;
+
+ /* Don't assume skb_transport_header() was set */
+ udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl);
+ if (udph->dest != htons(9))
+ goto out;
+
+ mlxh = (struct mlx5ehdr *)((char *)udph + sizeof(*udph));
+ if (mlxh->magic != cpu_to_be64(MLX5E_TEST_MAGIC))
+ goto out; /* so close ! */
+
+ /* bingo */
+ lbtp->loopback_ok = true;
+ complete(&lbtp->comp);
+out:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
+ struct mlx5e_lbt_priv *lbtp)
+{
+ int err = 0;
+
+ /* Temporarily enable local_lb */
+ err = mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb);
+ if (err)
+ return err;
+
+ if (!lbtp->local_lb) {
+ err = mlx5_nic_vport_update_local_lb(priv->mdev, true);
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_refresh_tirs(priv, true);
+ if (err)
+ goto out;
+
+ lbtp->loopback_ok = false;
+ init_completion(&lbtp->comp);
+
+ lbtp->pt.type = htons(ETH_P_IP);
+ lbtp->pt.func = mlx5e_test_loopback_validate;
+ lbtp->pt.dev = priv->netdev;
+ lbtp->pt.af_packet_priv = lbtp;
+ dev_add_pack(&lbtp->pt);
+
+ return 0;
+
+out:
+ if (!lbtp->local_lb)
+ mlx5_nic_vport_update_local_lb(priv->mdev, false);
+
+ return err;
+}
+
+static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
+ struct mlx5e_lbt_priv *lbtp)
+{
+ if (!lbtp->local_lb)
+ mlx5_nic_vport_update_local_lb(priv->mdev, false);
+
+ dev_remove_pack(&lbtp->pt);
+ mlx5e_refresh_tirs(priv, false);
+}
+
+#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200))
+static int mlx5e_test_loopback(struct mlx5e_priv *priv)
+{
+ struct mlx5e_lbt_priv *lbtp;
+ struct sk_buff *skb = NULL;
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ netdev_err(priv->netdev,
+ "\tCan't perform loopback test while device is down\n");
+ return -ENODEV;
+ }
+
+ lbtp = kzalloc(sizeof(*lbtp), GFP_KERNEL);
+ if (!lbtp)
+ return -ENOMEM;
+ lbtp->loopback_ok = false;
+
+ err = mlx5e_test_loopback_setup(priv, lbtp);
+ if (err)
+ goto out;
+
+ skb = mlx5e_test_get_udp_skb(priv);
+ if (!skb) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ skb_set_queue_mapping(skb, 0);
+ err = dev_queue_xmit(skb);
+ if (err) {
+ netdev_err(priv->netdev,
+ "\tFailed to xmit loopback packet err(%d)\n",
+ err);
+ goto cleanup;
+ }
+
+ wait_for_completion_timeout(&lbtp->comp, MLX5E_LB_VERIFY_TIMEOUT);
+ err = !lbtp->loopback_ok;
+
+cleanup:
+ mlx5e_test_loopback_cleanup(priv, lbtp);
+out:
+ kfree(lbtp);
+ return err;
+}
+#endif
+
+static int (*mlx5e_st_func[MLX5E_ST_NUM])(struct mlx5e_priv *) = {
+ mlx5e_test_link_state,
+ mlx5e_test_link_speed,
+ mlx5e_test_health_info,
+#ifdef CONFIG_INET
+ mlx5e_test_loopback,
+#endif
+};
+
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+ u64 *buf)
+{
+ struct mlx5e_priv *priv = netdev_priv(ndev);
+ int i;
+
+ memset(buf, 0, sizeof(u64) * MLX5E_ST_NUM);
+
+ mutex_lock(&priv->state_lock);
+ netdev_info(ndev, "Self test begin..\n");
+
+ for (i = 0; i < MLX5E_ST_NUM; i++) {
+ netdev_info(ndev, "\t[%d] %s start..\n",
+ i, mlx5e_self_tests[i]);
+ buf[i] = mlx5e_st_func[i](priv);
+ netdev_info(ndev, "\t[%d] %s end: result(%lld)\n",
+ i, mlx5e_self_tests[i], buf[i]);
+ }
+
+ mutex_unlock(&priv->state_lock);
+
+ for (i = 0; i < MLX5E_ST_NUM; i++) {
+ if (buf[i]) {
+ etest->flags |= ETH_TEST_FL_FAILED;
+ break;
+ }
+ }
+ netdev_info(ndev, "Self test out: status flags(0x%x)\n",
+ etest->flags);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
new file mode 100644
index 000000000..9a68dee58
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -0,0 +1,1404 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/tls.h"
+
+static const struct counter_desc sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
+
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+#endif
+
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_strides) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) },
+};
+
+#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc)
+
+static int mlx5e_grp_sw_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_SW_COUNTERS;
+}
+
+static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_SW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i);
+ return idx;
+}
+
+void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_sw_stats temp, *s = &temp;
+ int i;
+
+ memset(s, 0, sizeof(*s));
+
+ for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) {
+ struct mlx5e_channel_stats *channel_stats =
+ &priv->channel_stats[i];
+ struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
+ struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
+ struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
+ struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
+ int j;
+
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
+ s->rx_lro_packets += rq_stats->lro_packets;
+ s->rx_lro_bytes += rq_stats->lro_bytes;
+ s->rx_ecn_mark += rq_stats->ecn_mark;
+ s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
+ s->rx_csum_none += rq_stats->csum_none;
+ s->rx_csum_complete += rq_stats->csum_complete;
+ s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+ s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
+ s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
+ s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
+ s->rx_xdp_drop += rq_stats->xdp_drop;
+ s->rx_xdp_redirect += rq_stats->xdp_redirect;
+ s->rx_xdp_tx_xmit += xdpsq_stats->xmit;
+ s->rx_xdp_tx_full += xdpsq_stats->full;
+ s->rx_xdp_tx_err += xdpsq_stats->err;
+ s->rx_xdp_tx_cqe += xdpsq_stats->cqes;
+ s->rx_wqe_err += rq_stats->wqe_err;
+ s->rx_mpwqe_filler_cqes += rq_stats->mpwqe_filler_cqes;
+ s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
+ s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop;
+ s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
+ s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
+ s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
+ s->rx_page_reuse += rq_stats->page_reuse;
+ s->rx_cache_reuse += rq_stats->cache_reuse;
+ s->rx_cache_full += rq_stats->cache_full;
+ s->rx_cache_empty += rq_stats->cache_empty;
+ s->rx_cache_busy += rq_stats->cache_busy;
+ s->rx_cache_waive += rq_stats->cache_waive;
+ s->rx_congst_umr += rq_stats->congst_umr;
+ s->ch_events += ch_stats->events;
+ s->ch_poll += ch_stats->poll;
+ s->ch_arm += ch_stats->arm;
+ s->ch_aff_change += ch_stats->aff_change;
+ s->ch_eq_rearm += ch_stats->eq_rearm;
+ /* xdp redirect */
+ s->tx_xdp_xmit += xdpsq_red_stats->xmit;
+ s->tx_xdp_full += xdpsq_red_stats->full;
+ s->tx_xdp_err += xdpsq_red_stats->err;
+ s->tx_xdp_cqes += xdpsq_red_stats->cqes;
+
+ for (j = 0; j < priv->max_opened_tc; j++) {
+ struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_tso_packets += sq_stats->tso_packets;
+ s->tx_tso_bytes += sq_stats->tso_bytes;
+ s->tx_tso_inner_packets += sq_stats->tso_inner_packets;
+ s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes;
+ s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
+ s->tx_nop += sq_stats->nop;
+ s->tx_queue_stopped += sq_stats->stopped;
+ s->tx_queue_wake += sq_stats->wake;
+ s->tx_queue_dropped += sq_stats->dropped;
+ s->tx_cqe_err += sq_stats->cqe_err;
+ s->tx_recover += sq_stats->recover;
+ s->tx_xmit_more += sq_stats->xmit_more;
+ s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
+ s->tx_csum_none += sq_stats->csum_none;
+ s->tx_csum_partial += sq_stats->csum_partial;
+#ifdef CONFIG_MLX5_EN_TLS
+ s->tx_tls_ooo += sq_stats->tls_ooo;
+ s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+#endif
+ s->tx_cqes += sq_stats->cqes;
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
+ }
+ }
+
+ memcpy(&priv->stats.sw, s, sizeof(*s));
+}
+
+static const struct counter_desc q_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) },
+};
+
+static const struct counter_desc drop_rq_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_if_down_packets) },
+};
+
+#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc)
+#define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc)
+
+static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv)
+{
+ int num_stats = 0;
+
+ if (priv->q_counter)
+ num_stats += NUM_Q_COUNTERS;
+
+ if (priv->drop_rq_q_counter)
+ num_stats += NUM_DROP_RQ_COUNTERS;
+
+ return num_stats;
+}
+
+static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ q_stats_desc[i].format);
+
+ for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ drop_rq_stats_desc[i].format);
+
+ return idx;
+}
+
+static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+ data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+ q_stats_desc, i);
+ for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+ data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+ drop_rq_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)];
+
+ if (priv->q_counter &&
+ !mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out,
+ sizeof(out)))
+ qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
+ out, out_of_buffer);
+ if (priv->drop_rq_q_counter &&
+ !mlx5_core_query_q_counter(priv->mdev, priv->drop_rq_q_counter, 0,
+ out, sizeof(out)))
+ qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, out,
+ out_of_buffer);
+}
+
+#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c)
+static const struct counter_desc vnic_env_stats_desc[] = {
+ { "rx_steer_missed_packets",
+ VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) },
+};
+
+#define NUM_VNIC_ENV_COUNTERS ARRAY_SIZE(vnic_env_stats_desc)
+
+static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv)
+{
+ return MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard) ?
+ NUM_VNIC_ENV_COUNTERS : 0;
+}
+
+static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ return idx;
+
+ for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ vnic_env_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ return idx;
+
+ for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out,
+ vnic_env_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv)
+{
+ u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out;
+ int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out);
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ return;
+
+ MLX5_SET(query_vnic_env_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VNIC_ENV);
+ MLX5_SET(query_vnic_env_in, in, op_mod, 0);
+ MLX5_SET(query_vnic_env_in, in, other_vport, 0);
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
+#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
+static const struct counter_desc vport_stats_desc[] = {
+ { "rx_vport_unicast_packets",
+ VPORT_COUNTER_OFF(received_eth_unicast.packets) },
+ { "rx_vport_unicast_bytes",
+ VPORT_COUNTER_OFF(received_eth_unicast.octets) },
+ { "tx_vport_unicast_packets",
+ VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) },
+ { "tx_vport_unicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) },
+ { "rx_vport_multicast_packets",
+ VPORT_COUNTER_OFF(received_eth_multicast.packets) },
+ { "rx_vport_multicast_bytes",
+ VPORT_COUNTER_OFF(received_eth_multicast.octets) },
+ { "tx_vport_multicast_packets",
+ VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) },
+ { "tx_vport_multicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) },
+ { "rx_vport_broadcast_packets",
+ VPORT_COUNTER_OFF(received_eth_broadcast.packets) },
+ { "rx_vport_broadcast_bytes",
+ VPORT_COUNTER_OFF(received_eth_broadcast.octets) },
+ { "tx_vport_broadcast_packets",
+ VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) },
+ { "tx_vport_broadcast_bytes",
+ VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) },
+ { "rx_vport_rdma_unicast_packets",
+ VPORT_COUNTER_OFF(received_ib_unicast.packets) },
+ { "rx_vport_rdma_unicast_bytes",
+ VPORT_COUNTER_OFF(received_ib_unicast.octets) },
+ { "tx_vport_rdma_unicast_packets",
+ VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) },
+ { "tx_vport_rdma_unicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) },
+ { "rx_vport_rdma_multicast_packets",
+ VPORT_COUNTER_OFF(received_ib_multicast.packets) },
+ { "rx_vport_rdma_multicast_bytes",
+ VPORT_COUNTER_OFF(received_ib_multicast.octets) },
+ { "tx_vport_rdma_multicast_packets",
+ VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) },
+ { "tx_vport_rdma_multicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) },
+};
+
+#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc)
+
+static int mlx5e_grp_vport_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_VPORT_COUNTERS;
+}
+
+static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out,
+ vport_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_vport_update_stats(struct mlx5e_priv *priv)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ u32 *out = (u32 *)priv->stats.vport.query_vport_out;
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ MLX5_SET(query_vport_counter_in, in, op_mod, 0);
+ MLX5_SET(query_vport_counter_in, in, other_vport, 0);
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
+#define PPORT_802_3_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_802_3_stats_desc[] = {
+ { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) },
+ { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) },
+ { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
+ { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) },
+ { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) },
+ { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
+ { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
+ { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
+ { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
+ { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) },
+ { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) },
+ { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) },
+ { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
+ { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
+ { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) },
+ { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) },
+ { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
+ { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
+};
+
+#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc)
+
+static int mlx5e_grp_802_3_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_PPORT_802_3_COUNTERS;
+}
+
+static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters,
+ pport_802_3_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->IEEE_802_3_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PPORT_2863_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_2863_stats_desc[] = {
+ { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) },
+ { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) },
+ { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) },
+};
+
+#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc)
+
+static int mlx5e_grp_2863_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_PPORT_2863_COUNTERS;
+}
+
+static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters,
+ pport_2863_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_2863_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->RFC_2863_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PPORT_2819_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_2819_stats_desc[] = {
+ { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) },
+ { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) },
+ { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) },
+ { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) },
+ { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
+ { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
+ { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
+ { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
+ { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
+ { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
+ { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
+ { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
+ { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
+};
+
+#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc)
+
+static int mlx5e_grp_2819_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_PPORT_2819_COUNTERS;
+}
+
+static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters,
+ pport_2819_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->RFC_2819_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PPORT_PHY_STATISTICAL_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.phys_layer_statistical_cntrs.c##_high)
+static const struct counter_desc pport_phy_statistical_stats_desc[] = {
+ { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) },
+ { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
+};
+
+#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc)
+
+static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
+{
+ /* "1" for link_down_events special counter */
+ return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ?
+ NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1;
+}
+
+static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy");
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+ return idx;
+
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_phy_statistical_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ int i;
+
+ /* link_down_events_phy has special handling since it is not stored in __be64 format */
+ data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters,
+ counter_set.phys_layer_cntrs.link_down_events);
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+ return idx;
+
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+ pport_phy_statistical_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_phy_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->phy_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
+ return;
+
+ out = pstats->phy_statistical_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PPORT_ETH_EXT_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_eth_ext_stats_desc[] = {
+ { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) },
+};
+
+#define NUM_PPORT_ETH_EXT_COUNTERS ARRAY_SIZE(pport_eth_ext_stats_desc)
+
+static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv)
+{
+ if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+ return NUM_PPORT_ETH_EXT_COUNTERS;
+
+ return 0;
+}
+
+static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+ for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_eth_ext_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+ for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters,
+ pport_eth_ext_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_eth_ext_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters))
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->eth_ext_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PCIE_PERF_OFF(c) \
+ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c)
+static const struct counter_desc pcie_perf_stats_desc[] = {
+ { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) },
+ { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) },
+};
+
+#define PCIE_PERF_OFF64(c) \
+ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pcie_perf_stats_desc64[] = {
+ { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) },
+};
+
+static const struct counter_desc pcie_perf_stall_stats_desc[] = {
+ { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) },
+ { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) },
+ { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) },
+ { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) },
+};
+
+#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc)
+#define NUM_PCIE_PERF_COUNTERS64 ARRAY_SIZE(pcie_perf_stats_desc64)
+#define NUM_PCIE_PERF_STALL_COUNTERS ARRAY_SIZE(pcie_perf_stall_stats_desc)
+
+static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv)
+{
+ int num_stats = 0;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+ num_stats += NUM_PCIE_PERF_COUNTERS;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+ num_stats += NUM_PCIE_PERF_COUNTERS64;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+ num_stats += NUM_PCIE_PERF_STALL_COUNTERS;
+
+ return num_stats;
+}
+
+static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pcie_perf_stats_desc[i].format);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pcie_perf_stats_desc64[i].format);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+ for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pcie_perf_stall_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+ pcie_perf_stats_desc, i);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters,
+ pcie_perf_stats_desc64, i);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+ for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+ pcie_perf_stall_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_pcie_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(mpcnt_reg);
+ void *out;
+
+ if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group))
+ return;
+
+ out = pcie_stats->pcie_perf_counters;
+ MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
+}
+
+#define PPORT_PER_PRIO_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_per_prio_grp_data_layout.c##_high)
+static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
+ { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
+ { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
+ { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
+ { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
+};
+
+#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS ARRAY_SIZE(pport_per_prio_traffic_stats_desc)
+
+static int mlx5e_grp_per_prio_traffic_get_num_stats(void)
+{
+ return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO;
+}
+
+static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv,
+ u8 *data,
+ int idx)
+{
+ int i, prio;
+
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_prio_traffic_stats_desc[i].format, prio);
+ }
+
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv,
+ u64 *data,
+ int idx)
+{
+ int i, prio;
+
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
+ pport_per_prio_traffic_stats_desc, i);
+ }
+
+ return idx;
+}
+
+static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
+ /* %s is "global" or "prio{i}" */
+ { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) },
+ { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
+ { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) },
+ { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
+ { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
+};
+
+static const struct counter_desc pport_pfc_stall_stats_desc[] = {
+ { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
+ { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) },
+};
+
+#define NUM_PPORT_PER_PRIO_PFC_COUNTERS ARRAY_SIZE(pport_per_prio_pfc_stats_desc)
+#define NUM_PPORT_PFC_STALL_COUNTERS(priv) (ARRAY_SIZE(pport_pfc_stall_stats_desc) * \
+ MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) * \
+ MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+
+static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 pfc_en_tx;
+ u8 pfc_en_rx;
+ int err;
+
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return 0;
+
+ err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx);
+
+ return err ? 0 : pfc_en_tx | pfc_en_rx;
+}
+
+static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 rx_pause;
+ u32 tx_pause;
+ int err;
+
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return false;
+
+ err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
+ return err ? false : rx_pause | tx_pause;
+}
+
+static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv)
+{
+ return (mlx5e_query_global_pause_combined(priv) +
+ hweight8(mlx5e_query_pfc_combined(priv))) *
+ NUM_PPORT_PER_PRIO_PFC_COUNTERS +
+ NUM_PPORT_PFC_STALL_COUNTERS(priv);
+}
+
+static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv,
+ u8 *data,
+ int idx)
+{
+ unsigned long pfc_combined;
+ int i, prio;
+
+ pfc_combined = mlx5e_query_pfc_combined(priv);
+ for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ char pfc_string[ETH_GSTRING_LEN];
+
+ snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio);
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_prio_pfc_stats_desc[i].format, pfc_string);
+ }
+ }
+
+ if (mlx5e_query_global_pause_combined(priv)) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_prio_pfc_stats_desc[i].format, "global");
+ }
+ }
+
+ for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_pfc_stall_stats_desc[i].format);
+
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv,
+ u64 *data,
+ int idx)
+{
+ unsigned long pfc_combined;
+ int i, prio;
+
+ pfc_combined = mlx5e_query_pfc_combined(priv);
+ for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
+ pport_per_prio_pfc_stats_desc, i);
+ }
+ }
+
+ if (mlx5e_query_global_pause_combined(priv)) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+ pport_per_prio_pfc_stats_desc, i);
+ }
+ }
+
+ for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+ pport_pfc_stall_stats_desc, i);
+
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_get_num_stats(struct mlx5e_priv *priv)
+{
+ return mlx5e_grp_per_prio_traffic_get_num_stats() +
+ mlx5e_grp_per_prio_pfc_get_num_stats(priv);
+}
+
+static int mlx5e_grp_per_prio_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx);
+ idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx);
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx);
+ idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx);
+ return idx;
+}
+
+static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int prio;
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ out = pstats->per_prio_counters[prio];
+ MLX5_SET(ppcnt_reg, in, prio_tc, prio);
+ mlx5_core_access_reg(mdev, in, sz, out, sz,
+ MLX5_REG_PPCNT, 0, 0);
+ }
+}
+
+static const struct counter_desc mlx5e_pme_status_desc[] = {
+ { "module_unplug", 8 },
+};
+
+static const struct counter_desc mlx5e_pme_error_desc[] = {
+ { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */
+ { "module_high_temp", 48 }, /* high temperature */
+ { "module_bad_shorted", 56 }, /* bad or shorted cable/module */
+};
+
+#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc)
+#define NUM_PME_ERR_STATS ARRAY_SIZE(mlx5e_pme_error_desc)
+
+static int mlx5e_grp_pme_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS;
+}
+
+static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PME_STATUS_STATS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format);
+
+ for (i = 0; i < NUM_PME_ERR_STATS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
+
+ return idx;
+}
+
+static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+ int i;
+
+ for (i = 0; i < NUM_PME_STATUS_STATS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+ mlx5e_pme_status_desc, i);
+
+ for (i = 0; i < NUM_PME_ERR_STATS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+ mlx5e_pme_error_desc, i);
+
+ return idx;
+}
+
+static int mlx5e_grp_ipsec_get_num_stats(struct mlx5e_priv *priv)
+{
+ return mlx5e_ipsec_get_count(priv);
+}
+
+static int mlx5e_grp_ipsec_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ return idx + mlx5e_ipsec_get_strings(priv,
+ data + idx * ETH_GSTRING_LEN);
+}
+
+static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ return idx + mlx5e_ipsec_get_stats(priv, data + idx);
+}
+
+static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+ mlx5e_ipsec_update_stats(priv);
+}
+
+static int mlx5e_grp_tls_get_num_stats(struct mlx5e_priv *priv)
+{
+ return mlx5e_tls_get_count(priv);
+}
+
+static int mlx5e_grp_tls_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+}
+
+static int mlx5e_grp_tls_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ return idx + mlx5e_tls_get_stats(priv, data + idx);
+}
+
+static const struct counter_desc rq_stats_desc[] = {
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
+};
+
+static const struct counter_desc sq_stats_desc[] = {
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+};
+
+static const struct counter_desc rq_xdpsq_stats_desc[] = {
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
+static const struct counter_desc xdpsq_stats_desc[] = {
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
+static const struct counter_desc ch_stats_desc[] = {
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
+};
+
+#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc)
+#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc)
+#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc)
+#define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc)
+#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc)
+
+static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
+{
+ int max_nch = priv->profile->max_nch(priv->mdev);
+
+ return (NUM_RQ_STATS * max_nch) +
+ (NUM_CH_STATS * max_nch) +
+ (NUM_SQ_STATS * max_nch * priv->max_opened_tc) +
+ (NUM_RQ_XDPSQ_STATS * max_nch) +
+ (NUM_XDPSQ_STATS * max_nch);
+}
+
+static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int max_nch = priv->profile->max_nch(priv->mdev);
+ int i, j, tc;
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_CH_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ ch_stats_desc[j].format, i);
+
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_RQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ rq_stats_desc[j].format, i);
+ for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ rq_xdpsq_stats_desc[j].format, i);
+ }
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_SQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ sq_stats_desc[j].format,
+ priv->channel_tc2txq[i][tc]);
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_XDPSQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ xdpsq_stats_desc[j].format, i);
+
+ return idx;
+}
+
+static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int max_nch = priv->profile->max_nch(priv->mdev);
+ int i, j, tc;
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_CH_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch,
+ ch_stats_desc, j);
+
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_RQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
+ rq_stats_desc, j);
+ for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq,
+ rq_xdpsq_stats_desc, j);
+ }
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_SQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
+ sq_stats_desc, j);
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_XDPSQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq,
+ xdpsq_stats_desc, j);
+
+ return idx;
+}
+
+/* The stats groups order is opposite to the update_stats() order calls */
+const struct mlx5e_stats_grp mlx5e_stats_grps[] = {
+ {
+ .get_num_stats = mlx5e_grp_sw_get_num_stats,
+ .fill_strings = mlx5e_grp_sw_fill_strings,
+ .fill_stats = mlx5e_grp_sw_fill_stats,
+ .update_stats = mlx5e_grp_sw_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_q_get_num_stats,
+ .fill_strings = mlx5e_grp_q_fill_strings,
+ .fill_stats = mlx5e_grp_q_fill_stats,
+ .update_stats_mask = MLX5E_NDO_UPDATE_STATS,
+ .update_stats = mlx5e_grp_q_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_vnic_env_get_num_stats,
+ .fill_strings = mlx5e_grp_vnic_env_fill_strings,
+ .fill_stats = mlx5e_grp_vnic_env_fill_stats,
+ .update_stats = mlx5e_grp_vnic_env_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_vport_get_num_stats,
+ .fill_strings = mlx5e_grp_vport_fill_strings,
+ .fill_stats = mlx5e_grp_vport_fill_stats,
+ .update_stats_mask = MLX5E_NDO_UPDATE_STATS,
+ .update_stats = mlx5e_grp_vport_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_802_3_get_num_stats,
+ .fill_strings = mlx5e_grp_802_3_fill_strings,
+ .fill_stats = mlx5e_grp_802_3_fill_stats,
+ .update_stats_mask = MLX5E_NDO_UPDATE_STATS,
+ .update_stats = mlx5e_grp_802_3_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_2863_get_num_stats,
+ .fill_strings = mlx5e_grp_2863_fill_strings,
+ .fill_stats = mlx5e_grp_2863_fill_stats,
+ .update_stats = mlx5e_grp_2863_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_2819_get_num_stats,
+ .fill_strings = mlx5e_grp_2819_fill_strings,
+ .fill_stats = mlx5e_grp_2819_fill_stats,
+ .update_stats = mlx5e_grp_2819_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_phy_get_num_stats,
+ .fill_strings = mlx5e_grp_phy_fill_strings,
+ .fill_stats = mlx5e_grp_phy_fill_stats,
+ .update_stats = mlx5e_grp_phy_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_eth_ext_get_num_stats,
+ .fill_strings = mlx5e_grp_eth_ext_fill_strings,
+ .fill_stats = mlx5e_grp_eth_ext_fill_stats,
+ .update_stats = mlx5e_grp_eth_ext_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_pcie_get_num_stats,
+ .fill_strings = mlx5e_grp_pcie_fill_strings,
+ .fill_stats = mlx5e_grp_pcie_fill_stats,
+ .update_stats = mlx5e_grp_pcie_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_per_prio_get_num_stats,
+ .fill_strings = mlx5e_grp_per_prio_fill_strings,
+ .fill_stats = mlx5e_grp_per_prio_fill_stats,
+ .update_stats = mlx5e_grp_per_prio_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_pme_get_num_stats,
+ .fill_strings = mlx5e_grp_pme_fill_strings,
+ .fill_stats = mlx5e_grp_pme_fill_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_ipsec_get_num_stats,
+ .fill_strings = mlx5e_grp_ipsec_fill_strings,
+ .fill_stats = mlx5e_grp_ipsec_fill_stats,
+ .update_stats = mlx5e_grp_ipsec_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_tls_get_num_stats,
+ .fill_strings = mlx5e_grp_tls_fill_strings,
+ .fill_stats = mlx5e_grp_tls_fill_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_channels_get_num_stats,
+ .fill_strings = mlx5e_grp_channels_fill_strings,
+ .fill_stats = mlx5e_grp_channels_fill_stats,
+ }
+};
+
+const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
new file mode 100644
index 000000000..3ea8033ed
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_STATS_H__
+#define __MLX5_EN_STATS_H__
+
+#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \
+ (*(u64 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \
+ be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \
+ (*(u32 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \
+ be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
+
+#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld)
+
+struct counter_desc {
+ char format[ETH_GSTRING_LEN];
+ size_t offset; /* Byte offset */
+};
+
+struct mlx5e_sw_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_tso_packets;
+ u64 tx_tso_bytes;
+ u64 tx_tso_inner_packets;
+ u64 tx_tso_inner_bytes;
+ u64 tx_added_vlan_packets;
+ u64 tx_nop;
+ u64 rx_lro_packets;
+ u64 rx_lro_bytes;
+ u64 rx_ecn_mark;
+ u64 rx_removed_vlan_packets;
+ u64 rx_csum_unnecessary;
+ u64 rx_csum_none;
+ u64 rx_csum_complete;
+ u64 rx_csum_complete_tail;
+ u64 rx_csum_complete_tail_slow;
+ u64 rx_csum_unnecessary_inner;
+ u64 rx_xdp_drop;
+ u64 rx_xdp_redirect;
+ u64 rx_xdp_tx_xmit;
+ u64 rx_xdp_tx_full;
+ u64 rx_xdp_tx_err;
+ u64 rx_xdp_tx_cqe;
+ u64 tx_csum_none;
+ u64 tx_csum_partial;
+ u64 tx_csum_partial_inner;
+ u64 tx_queue_stopped;
+ u64 tx_queue_dropped;
+ u64 tx_xmit_more;
+ u64 tx_recover;
+ u64 tx_cqes;
+ u64 tx_queue_wake;
+ u64 tx_cqe_err;
+ u64 tx_xdp_xmit;
+ u64 tx_xdp_full;
+ u64 tx_xdp_err;
+ u64 tx_xdp_cqes;
+ u64 rx_wqe_err;
+ u64 rx_mpwqe_filler_cqes;
+ u64 rx_mpwqe_filler_strides;
+ u64 rx_oversize_pkts_sw_drop;
+ u64 rx_buff_alloc_err;
+ u64 rx_cqe_compress_blks;
+ u64 rx_cqe_compress_pkts;
+ u64 rx_page_reuse;
+ u64 rx_cache_reuse;
+ u64 rx_cache_full;
+ u64 rx_cache_empty;
+ u64 rx_cache_busy;
+ u64 rx_cache_waive;
+ u64 rx_congst_umr;
+ u64 ch_events;
+ u64 ch_poll;
+ u64 ch_arm;
+ u64 ch_aff_change;
+ u64 ch_eq_rearm;
+
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tx_tls_ooo;
+ u64 tx_tls_resync_bytes;
+#endif
+};
+
+struct mlx5e_qcounter_stats {
+ u32 rx_out_of_buffer;
+ u32 rx_if_down_packets;
+};
+
+struct mlx5e_vnic_env_stats {
+ __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
+};
+
+#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \
+ vstats->query_vport_out, c)
+
+struct mlx5e_vport_stats {
+ __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)];
+};
+
+#define PPORT_802_3_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \
+ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
+#define PPORT_2863_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \
+ counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
+#define PPORT_2819_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \
+ counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
+#define PPORT_PHY_STATISTICAL_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \
+ counter_set.phys_layer_statistical_cntrs.c##_high)
+#define PPORT_PER_PRIO_GET(pstats, prio, c) \
+ MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \
+ counter_set.eth_per_prio_grp_data_layout.c##_high)
+#define NUM_PPORT_PRIO 8
+#define PPORT_ETH_EXT_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \
+ counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
+
+struct mlx5e_pport_stats {
+ __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+};
+
+#define PCIE_PERF_GET(pcie_stats, c) \
+ MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
+ counter_set.pcie_perf_cntrs_grp_data_layout.c)
+
+#define PCIE_PERF_GET64(pcie_stats, c) \
+ MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
+ counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
+
+struct mlx5e_pcie_stats {
+ __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
+};
+
+struct mlx5e_rq_stats {
+ u64 packets;
+ u64 bytes;
+ u64 csum_complete;
+ u64 csum_complete_tail;
+ u64 csum_complete_tail_slow;
+ u64 csum_unnecessary;
+ u64 csum_unnecessary_inner;
+ u64 csum_none;
+ u64 lro_packets;
+ u64 lro_bytes;
+ u64 ecn_mark;
+ u64 removed_vlan_packets;
+ u64 xdp_drop;
+ u64 xdp_redirect;
+ u64 wqe_err;
+ u64 mpwqe_filler_cqes;
+ u64 mpwqe_filler_strides;
+ u64 oversize_pkts_sw_drop;
+ u64 buff_alloc_err;
+ u64 cqe_compress_blks;
+ u64 cqe_compress_pkts;
+ u64 page_reuse;
+ u64 cache_reuse;
+ u64 cache_full;
+ u64 cache_empty;
+ u64 cache_busy;
+ u64 cache_waive;
+ u64 congst_umr;
+};
+
+struct mlx5e_sq_stats {
+ /* commonly accessed in data path */
+ u64 packets;
+ u64 bytes;
+ u64 xmit_more;
+ u64 tso_packets;
+ u64 tso_bytes;
+ u64 tso_inner_packets;
+ u64 tso_inner_bytes;
+ u64 csum_partial;
+ u64 csum_partial_inner;
+ u64 added_vlan_packets;
+ u64 nop;
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tls_ooo;
+ u64 tls_resync_bytes;
+#endif
+ /* less likely accessed in data path */
+ u64 csum_none;
+ u64 stopped;
+ u64 dropped;
+ u64 recover;
+ /* dirtied @completion */
+ u64 cqes ____cacheline_aligned_in_smp;
+ u64 wake;
+ u64 cqe_err;
+};
+
+struct mlx5e_xdpsq_stats {
+ u64 xmit;
+ u64 full;
+ u64 err;
+ /* dirtied @completion */
+ u64 cqes ____cacheline_aligned_in_smp;
+};
+
+struct mlx5e_ch_stats {
+ u64 events;
+ u64 poll;
+ u64 arm;
+ u64 aff_change;
+ u64 eq_rearm;
+};
+
+struct mlx5e_stats {
+ struct mlx5e_sw_stats sw;
+ struct mlx5e_qcounter_stats qcnt;
+ struct mlx5e_vnic_env_stats vnic;
+ struct mlx5e_vport_stats vport;
+ struct mlx5e_pport_stats pport;
+ struct rtnl_link_stats64 vf_vport;
+ struct mlx5e_pcie_stats pcie;
+};
+
+enum {
+ MLX5E_NDO_UPDATE_STATS = BIT(0x1),
+};
+
+struct mlx5e_priv;
+struct mlx5e_stats_grp {
+ u16 update_stats_mask;
+ int (*get_num_stats)(struct mlx5e_priv *priv);
+ int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx);
+ int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx);
+ void (*update_stats)(struct mlx5e_priv *priv);
+};
+
+extern const struct mlx5e_stats_grp mlx5e_stats_grps[];
+extern const int mlx5e_num_stats_grps;
+
+void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
new file mode 100644
index 000000000..305085377
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -0,0 +1,3063 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/flow_dissector.h>
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_skbedit.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/device.h>
+#include <linux/rhashtable.h>
+#include <net/switchdev.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_csum.h>
+#include <net/vxlan.h>
+#include <net/arp.h>
+#include "en.h"
+#include "en_rep.h"
+#include "en_tc.h"
+#include "eswitch.h"
+#include "lib/vxlan.h"
+#include "fs_core.h"
+#include "en/port.h"
+
+struct mlx5_nic_flow_attr {
+ u32 action;
+ u32 flow_tag;
+ u32 mod_hdr_id;
+ u32 hairpin_tirn;
+ u8 match_level;
+ struct mlx5_flow_table *hairpin_ft;
+};
+
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
+
+enum {
+ MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS,
+ MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS,
+ MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE),
+ MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1),
+ MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
+ MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
+ MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
+};
+
+#define MLX5E_TC_MAX_SPLITS 1
+
+struct mlx5e_tc_flow {
+ struct rhash_head node;
+ struct mlx5e_priv *priv;
+ u64 cookie;
+ u8 flags;
+ struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+ struct list_head encap; /* flows sharing the same encap ID */
+ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
+ struct list_head hairpin; /* flows sharing the same hairpin */
+ union {
+ struct mlx5_esw_flow_attr esw_attr[0];
+ struct mlx5_nic_flow_attr nic_attr[0];
+ };
+};
+
+struct mlx5e_tc_flow_parse_attr {
+ struct ip_tunnel_info tun_info;
+ struct mlx5_flow_spec spec;
+ int num_mod_hdr_actions;
+ int max_mod_hdr_actions;
+ void *mod_hdr_actions;
+ int mirred_ifindex;
+};
+
+enum {
+ MLX5_HEADER_TYPE_VXLAN = 0x0,
+ MLX5_HEADER_TYPE_NVGRE = 0x1,
+};
+
+#define MLX5E_TC_TABLE_NUM_GROUPS 4
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
+
+struct mlx5e_hairpin {
+ struct mlx5_hairpin *pair;
+
+ struct mlx5_core_dev *func_mdev;
+ struct mlx5e_priv *func_priv;
+ u32 tdn;
+ u32 tirn;
+
+ int num_channels;
+ struct mlx5e_rqt indir_rqt;
+ u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_ttc_table ttc;
+};
+
+struct mlx5e_hairpin_entry {
+ /* a node of a hash table which keeps all the hairpin entries */
+ struct hlist_node hairpin_hlist;
+
+ /* flows sharing the same hairpin */
+ struct list_head flows;
+
+ u16 peer_vhca_id;
+ u8 prio;
+ struct mlx5e_hairpin *hp;
+};
+
+struct mod_hdr_key {
+ int num_actions;
+ void *actions;
+};
+
+struct mlx5e_mod_hdr_entry {
+ /* a node of a hash table which keeps all the mod_hdr entries */
+ struct hlist_node mod_hdr_hlist;
+
+ /* flows sharing the same mod_hdr entry */
+ struct list_head flows;
+
+ struct mod_hdr_key key;
+
+ u32 mod_hdr_id;
+};
+
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
+
+static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
+{
+ return jhash(key->actions,
+ key->num_actions * MLX5_MH_ACT_SZ, 0);
+}
+
+static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
+ struct mod_hdr_key *b)
+{
+ if (a->num_actions != b->num_actions)
+ return 1;
+
+ return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
+}
+
+static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int num_actions, actions_size, namespace, err;
+ struct mlx5e_mod_hdr_entry *mh;
+ struct mod_hdr_key key;
+ bool found = false;
+ u32 hash_key;
+
+ num_actions = parse_attr->num_mod_hdr_actions;
+ actions_size = MLX5_MH_ACT_SZ * num_actions;
+
+ key.actions = parse_attr->mod_hdr_actions;
+ key.num_actions = num_actions;
+
+ hash_key = hash_mod_hdr_info(&key);
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ namespace = MLX5_FLOW_NAMESPACE_FDB;
+ hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
+ mod_hdr_hlist, hash_key) {
+ if (!cmp_mod_hdr_info(&mh->key, &key)) {
+ found = true;
+ break;
+ }
+ }
+ } else {
+ namespace = MLX5_FLOW_NAMESPACE_KERNEL;
+ hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
+ mod_hdr_hlist, hash_key) {
+ if (!cmp_mod_hdr_info(&mh->key, &key)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ if (found)
+ goto attach_flow;
+
+ mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
+ if (!mh)
+ return -ENOMEM;
+
+ mh->key.actions = (void *)mh + sizeof(*mh);
+ memcpy(mh->key.actions, key.actions, actions_size);
+ mh->key.num_actions = num_actions;
+ INIT_LIST_HEAD(&mh->flows);
+
+ err = mlx5_modify_header_alloc(priv->mdev, namespace,
+ mh->key.num_actions,
+ mh->key.actions,
+ &mh->mod_hdr_id);
+ if (err)
+ goto out_err;
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
+ else
+ hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
+
+attach_flow:
+ list_add(&flow->mod_hdr, &mh->flows);
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
+ else
+ flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
+
+ return 0;
+
+out_err:
+ kfree(mh);
+ return err;
+}
+
+static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct list_head *next = flow->mod_hdr.next;
+
+ list_del(&flow->mod_hdr);
+
+ if (list_empty(next)) {
+ struct mlx5e_mod_hdr_entry *mh;
+
+ mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
+
+ mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
+ hash_del(&mh->mod_hdr_hlist);
+ kfree(mh);
+ }
+}
+
+static
+struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
+{
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+
+ netdev = __dev_get_by_index(net, ifindex);
+ priv = netdev_priv(netdev);
+ return priv->mdev;
+}
+
+static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
+ void *tirc;
+ int err;
+
+ err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
+ if (err)
+ goto alloc_tdn_err;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
+ MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
+
+ err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
+ if (err)
+ goto create_tir_err;
+
+ return 0;
+
+create_tir_err:
+ mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
+alloc_tdn_err:
+ return err;
+}
+
+static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
+{
+ mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
+ mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
+}
+
+static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
+{
+ u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
+ struct mlx5e_priv *priv = hp->func_priv;
+ int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
+
+ mlx5e_build_default_indir_rqt(indirection_rqt, sz,
+ hp->num_channels);
+
+ for (i = 0; i < sz; i++) {
+ ix = i;
+ if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR)
+ ix = mlx5e_bits_invert(i, ilog2(sz));
+ ix = indirection_rqt[ix];
+ rqn = hp->pair->rqn[ix];
+ MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
+ }
+}
+
+static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
+{
+ int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
+ struct mlx5e_priv *priv = hp->func_priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqtc;
+ u32 *in;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
+
+ mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
+
+ err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
+ if (!err)
+ hp->indir_rqt.enabled = true;
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_priv *priv = hp->func_priv;
+ u32 in[MLX5_ST_SZ_DW(create_tir_in)];
+ int tt, i, err;
+ void *tirc;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+
+ MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+
+ err = mlx5_core_create_tir(hp->func_mdev, in,
+ MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
+ if (err) {
+ mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
+ goto err_destroy_tirs;
+ }
+ }
+ return 0;
+
+err_destroy_tirs:
+ for (i = 0; i < tt; i++)
+ mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
+ return err;
+}
+
+static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
+{
+ int tt;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
+}
+
+static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
+ struct ttc_params *ttc_params)
+{
+ struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
+
+ memset(ttc_params, 0, sizeof(*ttc_params));
+
+ ttc_params->any_tt_tirn = hp->tirn;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
+
+ ft_attr->max_fte = MLX5E_NUM_TT;
+ ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
+ ft_attr->prio = MLX5E_TC_PRIO;
+}
+
+static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_priv *priv = hp->func_priv;
+ struct ttc_params ttc_params;
+ int err;
+
+ err = mlx5e_hairpin_create_indirect_rqt(hp);
+ if (err)
+ return err;
+
+ err = mlx5e_hairpin_create_indirect_tirs(hp);
+ if (err)
+ goto err_create_indirect_tirs;
+
+ mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
+ err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
+ if (err)
+ goto err_create_ttc_table;
+
+ netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
+ hp->num_channels, hp->ttc.ft.t->id);
+
+ return 0;
+
+err_create_ttc_table:
+ mlx5e_hairpin_destroy_indirect_tirs(hp);
+err_create_indirect_tirs:
+ mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+
+ return err;
+}
+
+static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_priv *priv = hp->func_priv;
+
+ mlx5e_destroy_ttc_table(priv, &hp->ttc);
+ mlx5e_hairpin_destroy_indirect_tirs(hp);
+ mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+}
+
+static struct mlx5e_hairpin *
+mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
+ int peer_ifindex)
+{
+ struct mlx5_core_dev *func_mdev, *peer_mdev;
+ struct mlx5e_hairpin *hp;
+ struct mlx5_hairpin *pair;
+ int err;
+
+ hp = kzalloc(sizeof(*hp), GFP_KERNEL);
+ if (!hp)
+ return ERR_PTR(-ENOMEM);
+
+ func_mdev = priv->mdev;
+ peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+
+ pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
+ if (IS_ERR(pair)) {
+ err = PTR_ERR(pair);
+ goto create_pair_err;
+ }
+ hp->pair = pair;
+ hp->func_mdev = func_mdev;
+ hp->func_priv = priv;
+ hp->num_channels = params->num_channels;
+
+ err = mlx5e_hairpin_create_transport(hp);
+ if (err)
+ goto create_transport_err;
+
+ if (hp->num_channels > 1) {
+ err = mlx5e_hairpin_rss_init(hp);
+ if (err)
+ goto rss_init_err;
+ }
+
+ return hp;
+
+rss_init_err:
+ mlx5e_hairpin_destroy_transport(hp);
+create_transport_err:
+ mlx5_core_hairpin_destroy(hp->pair);
+create_pair_err:
+ kfree(hp);
+ return ERR_PTR(err);
+}
+
+static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
+{
+ if (hp->num_channels > 1)
+ mlx5e_hairpin_rss_cleanup(hp);
+ mlx5e_hairpin_destroy_transport(hp);
+ mlx5_core_hairpin_destroy(hp->pair);
+ kvfree(hp);
+}
+
+static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
+{
+ return (peer_vhca_id << 16 | prio);
+}
+
+static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
+ u16 peer_vhca_id, u8 prio)
+{
+ struct mlx5e_hairpin_entry *hpe;
+ u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
+
+ hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
+ hairpin_hlist, hash_key) {
+ if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio)
+ return hpe;
+ }
+
+ return NULL;
+}
+
+#define UNKNOWN_MATCH_PRIO 8
+
+static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec, u8 *match_prio)
+{
+ void *headers_c, *headers_v;
+ u8 prio_val, prio_mask = 0;
+ bool vlan_present;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
+ netdev_warn(priv->netdev,
+ "only PCP trust state supported for hairpin\n");
+ return -EOPNOTSUPP;
+ }
+#endif
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+
+ vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
+ if (vlan_present) {
+ prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ }
+
+ if (!vlan_present || !prio_mask) {
+ prio_val = UNKNOWN_MATCH_PRIO;
+ } else if (prio_mask != 0x7) {
+ netdev_warn(priv->netdev,
+ "masked priority match not supported for hairpin\n");
+ return -EOPNOTSUPP;
+ }
+
+ *match_prio = prio_val;
+ return 0;
+}
+
+static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ int peer_ifindex = parse_attr->mirred_ifindex;
+ struct mlx5_hairpin_params params;
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5e_hairpin_entry *hpe;
+ struct mlx5e_hairpin *hp;
+ u64 link_speed64;
+ u32 link_speed;
+ u8 match_prio;
+ u16 peer_id;
+ int err;
+
+ peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+ if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
+ netdev_warn(priv->netdev, "hairpin is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
+ err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio);
+ if (err)
+ return err;
+ hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
+ if (hpe)
+ goto attach_flow;
+
+ hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
+ if (!hpe)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&hpe->flows);
+ hpe->peer_vhca_id = peer_id;
+ hpe->prio = match_prio;
+
+ params.log_data_size = 15;
+ params.log_data_size = min_t(u8, params.log_data_size,
+ MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
+ params.log_data_size = max_t(u8, params.log_data_size,
+ MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
+
+ params.log_num_packets = params.log_data_size -
+ MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
+ params.log_num_packets = min_t(u8, params.log_num_packets,
+ MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
+
+ params.q_counter = priv->q_counter;
+ /* set hairpin pair per each 50Gbs share of the link */
+ mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
+ link_speed = max_t(u32, link_speed, 50000);
+ link_speed64 = link_speed;
+ do_div(link_speed64, 50000);
+ params.num_channels = link_speed64;
+
+ hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
+ if (IS_ERR(hp)) {
+ err = PTR_ERR(hp);
+ goto create_hairpin_err;
+ }
+
+ netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
+ hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name,
+ hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
+
+ hpe->hp = hp;
+ hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
+ hash_hairpin_info(peer_id, match_prio));
+
+attach_flow:
+ if (hpe->hp->num_channels > 1) {
+ flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
+ flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+ } else {
+ flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
+ }
+ list_add(&flow->hairpin, &hpe->flows);
+
+ return 0;
+
+create_hairpin_err:
+ kfree(hpe);
+ return err;
+}
+
+static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct list_head *next = flow->hairpin.next;
+
+ list_del(&flow->hairpin);
+
+ /* no more hairpin flows for us, release the hairpin pair */
+ if (list_empty(next)) {
+ struct mlx5e_hairpin_entry *hpe;
+
+ hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
+
+ netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
+ hpe->hp->pair->peer_mdev->priv.name);
+
+ mlx5e_hairpin_destroy(hpe->hp);
+ hash_del(&hpe->hairpin_hlist);
+ kfree(hpe);
+ }
+}
+
+static struct mlx5_flow_handle *
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_nic_flow_attr *attr = flow->nic_attr;
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_flow_act flow_act = {
+ .action = attr->action,
+ .has_flow_tag = true,
+ .flow_tag = attr->flow_tag,
+ .encap_id = 0,
+ };
+ struct mlx5_fc *counter = NULL;
+ struct mlx5_flow_handle *rule;
+ bool table_created = false;
+ int err, dest_ix = 0;
+
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
+ err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_add_hairpin_flow;
+ }
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = attr->hairpin_ft;
+ } else {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest[dest_ix].tir_num = attr->hairpin_tirn;
+ }
+ dest_ix++;
+ } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = priv->fs.vlan.ft.t;
+ dest_ix++;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ counter = mlx5_fc_create(dev, true);
+ if (IS_ERR(counter)) {
+ rule = ERR_CAST(counter);
+ goto err_fc_create;
+ }
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[dest_ix].counter = counter;
+ dest_ix++;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ flow_act.modify_id = attr->mod_hdr_id;
+ kfree(parse_attr->mod_hdr_actions);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_create_mod_hdr_id;
+ }
+ }
+
+ if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
+ int tc_grp_size, tc_tbl_size;
+ u32 max_flow_counter;
+
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
+
+ tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
+ BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
+
+ priv->fs.tc.t =
+ mlx5_create_auto_grouped_flow_table(priv->fs.ns,
+ MLX5E_TC_PRIO,
+ tc_tbl_size,
+ MLX5E_TC_TABLE_NUM_GROUPS,
+ MLX5E_TC_FT_LEVEL, 0);
+ if (IS_ERR(priv->fs.tc.t)) {
+ netdev_err(priv->netdev,
+ "Failed to create tc offload table\n");
+ rule = ERR_CAST(priv->fs.tc.t);
+ goto err_create_ft;
+ }
+
+ table_created = true;
+ }
+
+ if (attr->match_level != MLX5_MATCH_NONE)
+ parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
+ &flow_act, dest, dest_ix);
+
+ if (IS_ERR(rule))
+ goto err_add_rule;
+
+ return rule;
+
+err_add_rule:
+ if (table_created) {
+ mlx5_destroy_flow_table(priv->fs.tc.t);
+ priv->fs.tc.t = NULL;
+ }
+err_create_ft:
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ mlx5e_detach_mod_hdr(priv, flow);
+err_create_mod_hdr_id:
+ mlx5_fc_destroy(dev, counter);
+err_fc_create:
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+ mlx5e_hairpin_flow_del(priv, flow);
+err_add_hairpin_flow:
+ return rule;
+}
+
+static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_nic_flow_attr *attr = flow->nic_attr;
+ struct mlx5_fc *counter = NULL;
+
+ counter = mlx5_flow_rule_counter(flow->rule[0]);
+ mlx5_del_flow_rules(flow->rule[0]);
+ mlx5_fc_destroy(priv->mdev, counter);
+
+ if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) {
+ mlx5_destroy_flow_table(priv->fs.tc.t);
+ priv->fs.tc.t = NULL;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ mlx5e_detach_mod_hdr(priv, flow);
+
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+ mlx5e_hairpin_flow_del(priv, flow);
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct ip_tunnel_info *tun_info,
+ struct net_device *mirred_dev,
+ struct net_device **encap_dev,
+ struct mlx5e_tc_flow *flow);
+
+static struct mlx5_flow_handle *
+mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct net_device *out_dev, *encap_dev = NULL;
+ struct mlx5_flow_handle *rule = NULL;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *out_priv;
+ int err;
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+ out_dev = __dev_get_by_index(dev_net(priv->netdev),
+ attr->parse_attr->mirred_ifindex);
+ err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
+ out_dev, &encap_dev, flow);
+ if (err) {
+ rule = ERR_PTR(err);
+ if (err != -EAGAIN)
+ goto err_attach_encap;
+ }
+ out_priv = netdev_priv(encap_dev);
+ rpriv = out_priv->ppriv;
+ attr->out_rep[attr->out_count] = rpriv->rep;
+ attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ }
+
+ err = mlx5_eswitch_add_vlan_action(esw, attr);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_add_vlan;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ kfree(parse_attr->mod_hdr_actions);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_mod_hdr;
+ }
+ }
+
+ /* we get here if (1) there's no error (rule being null) or when
+ * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
+ */
+ if (rule != ERR_PTR(-EAGAIN)) {
+ rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
+ if (IS_ERR(rule))
+ goto err_add_rule;
+
+ if (attr->mirror_count) {
+ flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr);
+ if (IS_ERR(flow->rule[1]))
+ goto err_fwd_rule;
+ }
+ }
+ return rule;
+
+err_fwd_rule:
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+ rule = flow->rule[1];
+err_add_rule:
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ mlx5e_detach_mod_hdr(priv, flow);
+err_mod_hdr:
+ mlx5_eswitch_del_vlan_action(esw, attr);
+err_add_vlan:
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+ mlx5e_detach_encap(priv, flow);
+err_attach_encap:
+ return rule;
+}
+
+static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+
+ if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+ flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
+ if (attr->mirror_count)
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ }
+
+ mlx5_eswitch_del_vlan_action(esw, attr);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+ mlx5e_detach_encap(priv, flow);
+ kvfree(attr->parse_attr);
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ mlx5e_detach_mod_hdr(priv, flow);
+}
+
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+ e->encap_size, e->encap_header,
+ &e->encap_id);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
+ err);
+ return;
+ }
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(flow, &e->flows, encap) {
+ esw_attr = flow->esw_attr;
+ esw_attr->encap_id = e->encap_id;
+ flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ continue;
+ }
+
+ if (esw_attr->mirror_count) {
+ flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
+ if (IS_ERR(flow->rule[1])) {
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr);
+ err = PTR_ERR(flow->rule[1]);
+ mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n",
+ err);
+ continue;
+ }
+ }
+
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ }
+}
+
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, &e->flows, encap) {
+ if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+
+ flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
+ if (attr->mirror_count)
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ }
+ }
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_encap_dealloc(priv->mdev, e->encap_id);
+ }
+}
+
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5e_encap_entry *e;
+ struct mlx5_fc *counter;
+ struct neigh_table *tbl;
+ bool neigh_used = false;
+ struct neighbour *n;
+ u64 lastuse;
+
+ if (m_neigh->family == AF_INET)
+ tbl = &arp_tbl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (m_neigh->family == AF_INET6)
+ tbl = &nd_tbl;
+#endif
+ else
+ return;
+
+ list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
+ continue;
+ list_for_each_entry(flow, &e->flows, encap) {
+ if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+ counter = mlx5_flow_rule_counter(flow->rule[0]);
+ lastuse = mlx5_fc_query_lastuse(counter);
+ if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
+ neigh_used = true;
+ break;
+ }
+ }
+ }
+ if (neigh_used)
+ break;
+ }
+
+ if (neigh_used) {
+ nhe->reported_lastuse = jiffies;
+
+ /* find the relevant neigh according to the cached device and
+ * dst ip pair
+ */
+ n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
+ if (!n)
+ return;
+
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct list_head *next = flow->encap.next;
+
+ list_del(&flow->encap);
+ if (list_empty(next)) {
+ struct mlx5e_encap_entry *e;
+
+ e = list_entry(next, struct mlx5e_encap_entry, flows);
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ mlx5_encap_dealloc(priv->mdev, e->encap_id);
+
+ hash_del_rcu(&e->encap_hlist);
+ kfree(e->encap_header);
+ kfree(e);
+ }
+}
+
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ else
+ mlx5e_tc_del_nic_flow(priv, flow);
+}
+
+static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->key);
+ struct flow_dissector_key_keyid *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->mask);
+ MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+ be32_to_cpu(mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+ be32_to_cpu(key->keyid));
+ }
+}
+
+static int parse_tunnel_attr(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+
+ struct flow_dissector_key_control *enc_control =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ f->key);
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+ struct flow_dissector_key_ports *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ f->key);
+ struct flow_dissector_key_ports *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ f->mask);
+
+ /* Full udp dst port must be given */
+ if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
+ goto vxlan_match_offload_err;
+
+ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
+ MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
+ parse_vxlan_attr(spec, f);
+ else {
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(key->dst));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(key->src));
+ } else { /* udp dst port must be given */
+vxlan_match_offload_err:
+ netdev_warn(priv->netdev,
+ "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_dissector_key_ipv4_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv4_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ f->mask);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(key->dst));
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+ } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ f->mask);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ struct flow_dissector_key_ip *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IP,
+ f->key);
+ struct flow_dissector_key_ip *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IP,
+ f->mask);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+ }
+
+ /* Enforce DMAC when offloading incoming tunneled flows.
+ * Flow counters require a match on the DMAC.
+ */
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16), priv->netdev->dev_addr);
+
+ /* let software handle IP fragments */
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
+
+ return 0;
+}
+
+static int __parse_cls_flower(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ u8 *match_level)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+
+ *match_level = MLX5_MATCH_NONE;
+
+ if (f->dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN) |
+ BIT(FLOW_DISSECTOR_KEY_CVLAN) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_TCP) |
+ BIT(FLOW_DISSECTOR_KEY_IP) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
+ netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
+ f->dissector->used_keys);
+ return -EOPNOTSUPP;
+ }
+
+ if ((dissector_uses_key(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
+ dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
+ dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
+ dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ f->key);
+ switch (key->addr_type) {
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+ if (parse_tunnel_attr(priv, spec, f))
+ return -EOPNOTSUPP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* In decap flow, header pointers should point to the inner
+ * headers, outer header were already set by parse_tunnel_attr
+ */
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ inner_headers);
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ ntohs(mask->n_proto));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ ntohs(key->n_proto));
+
+ if (mask->n_proto)
+ *match_level = MLX5_MATCH_L2;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_dissector_key_vlan *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->key);
+ struct flow_dissector_key_vlan *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->mask);
+ if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
+ if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ svlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ svlan_tag, 1);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ cvlan_tag, 1);
+ }
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
+
+ *match_level = MLX5_MATCH_L2;
+ }
+ } else if (*match_level != MLX5_MATCH_NONE) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
+ *match_level = MLX5_MATCH_L2;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {
+ struct flow_dissector_key_vlan *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CVLAN,
+ f->key);
+ struct flow_dissector_key_vlan *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CVLAN,
+ f->mask);
+ if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
+ if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_svlan_tag, 1);
+ MLX5_SET(fte_match_set_misc, misc_v,
+ outer_second_svlan_tag, 1);
+ } else {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_cvlan_tag, 1);
+ MLX5_SET(fte_match_set_misc, misc_v,
+ outer_second_cvlan_tag, 1);
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
+ mask->vlan_id);
+ MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
+ key->vlan_id);
+ MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
+ mask->vlan_priority);
+ MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
+ key->vlan_priority);
+
+ *match_level = MLX5_MATCH_L2;
+ }
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_dissector_key_eth_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+ struct flow_dissector_key_eth_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->mask);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dmac_47_16),
+ mask->dst);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16),
+ key->dst);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ smac_47_16),
+ mask->src);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ smac_47_16),
+ key->src);
+
+ if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
+ *match_level = MLX5_MATCH_L2;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ f->key);
+
+ struct flow_dissector_key_control *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ f->mask);
+ addr_type = key->addr_type;
+
+ /* the HW doesn't support frag first/later */
+ if (mask->flags & FLOW_DIS_FIRST_FRAG)
+ return -EOPNOTSUPP;
+
+ if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
+ key->flags & FLOW_DIS_IS_FRAGMENT);
+
+ /* the HW doesn't need L3 inline to match on frag=no */
+ if (!(key->flags & FLOW_DIS_IS_FRAGMENT))
+ *match_level = MLX5_MATCH_L2;
+ /* *** L2 attributes parsing up to here *** */
+ else
+ *match_level = MLX5_MATCH_L3;
+ }
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+ ip_proto = key->ip_proto;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ mask->ip_proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ key->ip_proto);
+
+ if (mask->ip_proto)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_dissector_key_ipv4_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv4_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->mask);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &mask->src, sizeof(mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &key->src, sizeof(key->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &mask->dst, sizeof(mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &key->dst, sizeof(key->dst));
+
+ if (mask->src || mask->dst)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->mask);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &mask->src, sizeof(mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &key->src, sizeof(key->src));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &mask->dst, sizeof(mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &key->dst, sizeof(key->dst));
+
+ if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
+ ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
+ struct flow_dissector_key_ip *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ f->key);
+ struct flow_dissector_key_ip *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ f->mask);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+
+ if (mask->ttl &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_ipv4_ttl))
+ return -EOPNOTSUPP;
+
+ if (mask->tos || mask->ttl)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ /* *** L3 attributes parsing up to here *** */
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_dissector_key_ports *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->key);
+ struct flow_dissector_key_ports *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->mask);
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_sport, ntohs(key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_dport, ntohs(key->dst));
+ break;
+
+ case IPPROTO_UDP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(key->dst));
+ break;
+ default:
+ netdev_err(priv->netdev,
+ "Only UDP and TCP transport are supported\n");
+ return -EINVAL;
+ }
+
+ if (mask->src || mask->dst)
+ *match_level = MLX5_MATCH_L4;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_dissector_key_tcp *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_TCP,
+ f->key);
+ struct flow_dissector_key_tcp *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_TCP,
+ f->mask);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(mask->flags));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
+ ntohs(key->flags));
+
+ if (mask->flags)
+ *match_level = MLX5_MATCH_L4;
+ }
+
+ return 0;
+}
+
+static int parse_cls_flower(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
+ u8 match_level;
+ int err;
+
+ err = __parse_cls_flower(priv, spec, f, &match_level);
+
+ if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
+ rep = rpriv->rep;
+ if (rep->vport != FDB_UPLINK_VPORT &&
+ (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
+ esw->offloads.inline_mode < match_level)) {
+ netdev_warn(priv->netdev,
+ "Flow is not offloaded due to min inline setting, required %d actual %d\n",
+ match_level, esw->offloads.inline_mode);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ flow->esw_attr->match_level = match_level;
+ else
+ flow->nic_attr->match_level = match_level;
+
+ return err;
+}
+
+struct pedit_headers {
+ struct ethhdr eth;
+ struct iphdr ip4;
+ struct ipv6hdr ip6;
+ struct tcphdr tcp;
+ struct udphdr udp;
+};
+
+static int pedit_header_offsets[] = {
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+};
+
+#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
+
+static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
+ struct pedit_headers *masks,
+ struct pedit_headers *vals)
+{
+ u32 *curr_pmask, *curr_pval;
+
+ if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
+ goto out_err;
+
+ curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
+ curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset);
+
+ if (*curr_pmask & mask) /* disallow acting twice on the same location */
+ goto out_err;
+
+ *curr_pmask |= mask;
+ *curr_pval |= (val & mask);
+
+ return 0;
+
+out_err:
+ return -EOPNOTSUPP;
+}
+
+struct mlx5_fields {
+ u8 field;
+ u8 size;
+ u32 offset;
+};
+
+#define OFFLOAD(fw_field, size, field, off) \
+ {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)}
+
+static struct mlx5_fields fields[] = {
+ OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
+ OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0),
+ OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0),
+ OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0),
+ OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0),
+
+ OFFLOAD(IP_TTL, 1, ip4.ttl, 0),
+ OFFLOAD(SIPV4, 4, ip4.saddr, 0),
+ OFFLOAD(DIPV4, 4, ip4.daddr, 0),
+
+ OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0),
+ OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0),
+ OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0),
+ OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0),
+ OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0),
+ OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0),
+ OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0),
+ OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0),
+ OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0),
+
+ OFFLOAD(TCP_SPORT, 2, tcp.source, 0),
+ OFFLOAD(TCP_DPORT, 2, tcp.dest, 0),
+ OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5),
+
+ OFFLOAD(UDP_SPORT, 2, udp.source, 0),
+ OFFLOAD(UDP_DPORT, 2, udp.dest, 0),
+};
+
+/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
+ * max from the SW pedit action. On success, attr->num_mod_hdr_actions
+ * says how many HW actions were actually parsed.
+ */
+static int offload_pedit_fields(struct pedit_headers *masks,
+ struct pedit_headers *vals,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
+ int i, action_size, nactions, max_actions, first, last, next_z;
+ void *s_masks_p, *a_masks_p, *vals_p;
+ struct mlx5_fields *f;
+ u8 cmd, field_bsize;
+ u32 s_mask, a_mask;
+ unsigned long mask;
+ __be32 mask_be32;
+ __be16 mask_be16;
+ void *action;
+
+ set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
+ add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
+ set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
+ add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
+
+ action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+ action = parse_attr->mod_hdr_actions +
+ parse_attr->num_mod_hdr_actions * action_size;
+
+ max_actions = parse_attr->max_mod_hdr_actions;
+ nactions = parse_attr->num_mod_hdr_actions;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ /* avoid seeing bits set from previous iterations */
+ s_mask = 0;
+ a_mask = 0;
+
+ s_masks_p = (void *)set_masks + f->offset;
+ a_masks_p = (void *)add_masks + f->offset;
+
+ memcpy(&s_mask, s_masks_p, f->size);
+ memcpy(&a_mask, a_masks_p, f->size);
+
+ if (!s_mask && !a_mask) /* nothing to offload here */
+ continue;
+
+ if (s_mask && a_mask) {
+ printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
+ return -EOPNOTSUPP;
+ }
+
+ if (nactions == max_actions) {
+ printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
+ return -EOPNOTSUPP;
+ }
+
+ if (s_mask) {
+ cmd = MLX5_ACTION_TYPE_SET;
+ mask = s_mask;
+ vals_p = (void *)set_vals + f->offset;
+ /* clear to denote we consumed this field */
+ memset(s_masks_p, 0, f->size);
+ } else {
+ cmd = MLX5_ACTION_TYPE_ADD;
+ mask = a_mask;
+ vals_p = (void *)add_vals + f->offset;
+ /* clear to denote we consumed this field */
+ memset(a_masks_p, 0, f->size);
+ }
+
+ field_bsize = f->size * BITS_PER_BYTE;
+
+ if (field_bsize == 32) {
+ mask_be32 = *(__be32 *)&mask;
+ mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
+ } else if (field_bsize == 16) {
+ mask_be16 = *(__be16 *)&mask;
+ mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
+ }
+
+ first = find_first_bit(&mask, field_bsize);
+ next_z = find_next_zero_bit(&mask, field_bsize, first);
+ last = find_last_bit(&mask, field_bsize);
+ if (first < next_z && next_z < last) {
+ printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
+ mask);
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(set_action_in, action, action_type, cmd);
+ MLX5_SET(set_action_in, action, field, f->field);
+
+ if (cmd == MLX5_ACTION_TYPE_SET) {
+ MLX5_SET(set_action_in, action, offset, first);
+ /* length is num of bits to be written, zero means length of 32 */
+ MLX5_SET(set_action_in, action, length, (last - first + 1));
+ }
+
+ if (field_bsize == 32)
+ MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
+ else if (field_bsize == 16)
+ MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
+ else if (field_bsize == 8)
+ MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
+
+ action += action_size;
+ nactions++;
+ }
+
+ parse_attr->num_mod_hdr_actions = nactions;
+ return 0;
+}
+
+static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
+ const struct tc_action *a, int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ int nkeys, action_size, max_actions;
+
+ nkeys = tcf_pedit_nkeys(a);
+ action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+ max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
+ else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+ max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
+
+ /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
+ max_actions = min(max_actions, nkeys * 16);
+
+ parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
+ if (!parse_attr->mod_hdr_actions)
+ return -ENOMEM;
+
+ parse_attr->max_mod_hdr_actions = max_actions;
+ return 0;
+}
+
+static const struct pedit_headers zero_masks = {};
+
+static int parse_tc_pedit_action(struct mlx5e_priv *priv,
+ const struct tc_action *a, int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
+ int nkeys, i, err = -EOPNOTSUPP;
+ u32 mask, val, offset;
+ u8 cmd, htype;
+
+ nkeys = tcf_pedit_nkeys(a);
+
+ memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
+ memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
+
+ for (i = 0; i < nkeys; i++) {
+ htype = tcf_pedit_htype(a, i);
+ cmd = tcf_pedit_cmd(a, i);
+ err = -EOPNOTSUPP; /* can't be all optimistic */
+
+ if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
+ netdev_warn(priv->netdev, "legacy pedit isn't offloaded\n");
+ goto out_err;
+ }
+
+ if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
+ netdev_warn(priv->netdev, "pedit cmd %d isn't offloaded\n", cmd);
+ goto out_err;
+ }
+
+ mask = tcf_pedit_mask(a, i);
+ val = tcf_pedit_val(a, i);
+ offset = tcf_pedit_offset(a, i);
+
+ err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
+ if (err)
+ goto out_err;
+ }
+
+ if (!parse_attr->mod_hdr_actions) {
+ err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
+ if (err)
+ goto out_err;
+ }
+
+ err = offload_pedit_fields(masks, vals, parse_attr);
+ if (err < 0)
+ goto out_dealloc_parsed_actions;
+
+ for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
+ cmd_masks = &masks[cmd];
+ if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
+ netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
+ print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
+ 16, 1, cmd_masks, sizeof(zero_masks), true);
+ err = -EOPNOTSUPP;
+ goto out_dealloc_parsed_actions;
+ }
+ }
+
+ return 0;
+
+out_dealloc_parsed_actions:
+ kfree(parse_attr->mod_hdr_actions);
+out_err:
+ return err;
+}
+
+static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
+{
+ u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+ TCA_CSUM_UPDATE_FLAG_UDP;
+
+ /* The HW recalcs checksums only if re-writing headers */
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+ netdev_warn(priv->netdev,
+ "TC csum action is only offloaded with pedit\n");
+ return false;
+ }
+
+ if (update_flags & ~prot_flags) {
+ netdev_warn(priv->netdev,
+ "can't offload TC csum action for some header/s - flags %#x\n",
+ update_flags);
+ return false;
+ }
+
+ return true;
+}
+
+static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
+ struct tcf_exts *exts)
+{
+ const struct tc_action *a;
+ bool modify_ip_header;
+ LIST_HEAD(actions);
+ u8 htype, ip_proto;
+ void *headers_v;
+ u16 ethertype;
+ int nkeys, i;
+
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+ ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
+
+ /* for non-IP we only re-write MACs, so we're okay */
+ if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
+ goto out_ok;
+
+ modify_ip_header = false;
+ tcf_exts_for_each_action(i, a, exts) {
+ int k;
+
+ if (!is_tcf_pedit(a))
+ continue;
+
+ nkeys = tcf_pedit_nkeys(a);
+ for (k = 0; k < nkeys; k++) {
+ htype = tcf_pedit_htype(a, k);
+ if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 ||
+ htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) {
+ modify_ip_header = true;
+ break;
+ }
+ }
+ }
+
+ ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
+ if (modify_ip_header && ip_proto != IPPROTO_TCP &&
+ ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
+ pr_info("can't offload re-write of ip proto %d\n", ip_proto);
+ return false;
+ }
+
+out_ok:
+ return true;
+}
+
+static bool actions_match_supported(struct mlx5e_priv *priv,
+ struct tcf_exts *exts,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ u32 actions;
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ actions = flow->esw_attr->action;
+ else
+ actions = flow->nic_attr->action;
+
+ if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
+ !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP))
+ return false;
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return modify_header_match_supported(&parse_attr->spec, exts);
+
+ return true;
+}
+
+static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+{
+ struct mlx5_core_dev *fmdev, *pmdev;
+ u64 fsystem_guid, psystem_guid;
+
+ fmdev = priv->mdev;
+ pmdev = peer_priv->mdev;
+
+ mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid);
+ mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid);
+
+ return (fsystem_guid == psystem_guid);
+}
+
+static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_nic_flow_attr *attr = flow->nic_attr;
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+ u32 action = 0;
+ int err, i;
+
+ if (!tcf_exts_has_actions(exts))
+ return -EINVAL;
+
+ attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+
+ tcf_exts_for_each_action(i, a, exts) {
+ if (is_tcf_gact_shot(a)) {
+ action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ if (MLX5_CAP_FLOWTABLE(priv->mdev,
+ flow_table_properties_nic_receive.flow_counter))
+ action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ continue;
+ }
+
+ if (is_tcf_pedit(a)) {
+ err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
+ parse_attr);
+ if (err)
+ return err;
+
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ continue;
+ }
+
+ if (is_tcf_csum(a)) {
+ if (csum_offload_supported(priv, action,
+ tcf_csum_update_flags(a)))
+ continue;
+
+ return -EOPNOTSUPP;
+ }
+
+ if (is_tcf_mirred_egress_redirect(a)) {
+ struct net_device *peer_dev = tcf_mirred_dev(a);
+
+ if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
+ same_hw_devs(priv, netdev_priv(peer_dev))) {
+ parse_attr->mirred_ifindex = peer_dev->ifindex;
+ flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ } else {
+ netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
+ peer_dev->name);
+ return -EINVAL;
+ }
+ continue;
+ }
+
+ if (is_tcf_skbedit_mark(a)) {
+ u32 mark = tcf_skbedit_mark(a);
+
+ if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
+ netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
+ mark);
+ return -EINVAL;
+ }
+
+ attr->flow_tag = mark;
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ continue;
+ }
+
+ return -EINVAL;
+ }
+
+ attr->action = action;
+ if (!actions_match_supported(priv, exts, parse_attr, flow))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static inline int cmp_encap_info(struct ip_tunnel_key *a,
+ struct ip_tunnel_key *b)
+{
+ return memcmp(a, b, sizeof(*a));
+}
+
+static inline int hash_encap_info(struct ip_tunnel_key *key)
+{
+ return jhash(key, sizeof(*key), 0);
+}
+
+static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct flowi4 *fl4,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct rtable *rt;
+ struct neighbour *n = NULL;
+
+#if IS_ENABLED(CONFIG_INET)
+ int ret;
+
+ rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+ ret = PTR_ERR_OR_ZERO(rt);
+ if (ret)
+ return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ /* if the egress device isn't on the same HW e-switch, we use the uplink */
+ if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
+ *out_dev = uplink_rpriv->netdev;
+ else
+ *out_dev = rt->dst.dev;
+
+ if (!(*out_ttl))
+ *out_ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
+ ip_rt_put(rt);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
+{
+ struct mlx5e_priv *peer_priv;
+
+ peer_priv = netdev_priv(peer_netdev);
+
+ return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
+ (priv->netdev->netdev_ops == peer_netdev->netdev_ops) &&
+ same_hw_devs(priv, peer_priv) &&
+ MLX5_VPORT_MANAGER(peer_priv->mdev) &&
+ (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS));
+}
+
+static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct flowi6 *fl6,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct neighbour *n = NULL;
+ struct dst_entry *dst;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6,
+ NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+
+ if (!(*out_ttl))
+ *out_ttl = ip6_dst_hoplimit(dst);
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ /* if the egress device isn't on the same HW e-switch, we use the uplink */
+ if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
+ *out_dev = uplink_rpriv->netdev;
+ else
+ *out_dev = dst->dev;
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ n = dst_neigh_lookup(dst, &fl6->daddr);
+ dst_release(dst);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static void gen_vxlan_header_ipv4(struct net_device *out_dev,
+ char buf[], int encap_size,
+ unsigned char h_dest[ETH_ALEN],
+ u8 tos, u8 ttl,
+ __be32 daddr,
+ __be32 saddr,
+ __be16 udp_dst_port,
+ __be32 vx_vni)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
+ struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
+ struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+
+ memset(buf, 0, encap_size);
+
+ ether_addr_copy(eth->h_dest, h_dest);
+ ether_addr_copy(eth->h_source, out_dev->dev_addr);
+ eth->h_proto = htons(ETH_P_IP);
+
+ ip->daddr = daddr;
+ ip->saddr = saddr;
+
+ ip->tos = tos;
+ ip->ttl = ttl;
+ ip->protocol = IPPROTO_UDP;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+
+ udp->dest = udp_dst_port;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(vx_vni);
+}
+
+static void gen_vxlan_header_ipv6(struct net_device *out_dev,
+ char buf[], int encap_size,
+ unsigned char h_dest[ETH_ALEN],
+ u8 tos, u8 ttl,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr,
+ __be16 udp_dst_port,
+ __be32 vx_vni)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
+ struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
+ struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+
+ memset(buf, 0, encap_size);
+
+ ether_addr_copy(eth->h_dest, h_dest);
+ ether_addr_copy(eth->h_source, out_dev->dev_addr);
+ eth->h_proto = htons(ETH_P_IPV6);
+
+ ip6_flow_hdr(ip6h, tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = *daddr;
+ ip6h->saddr = *saddr;
+
+ udp->dest = udp_dst_port;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(vx_vni);
+}
+
+static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev;
+ struct neighbour *n = NULL;
+ struct flowi4 fl4 = {};
+ u8 nud_state, tos, ttl;
+ char *encap_header;
+ int err;
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ fl4.flowi4_proto = IPPROTO_UDP;
+ fl4.fl4_dport = tun_key->tp_dst;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto free_encap;
+ }
+
+ tos = tun_key->tos;
+ ttl = tun_key->ttl;
+
+ fl4.flowi4_tos = tun_key->tos;
+ fl4.daddr = tun_key->u.ipv4.dst;
+ fl4.saddr = tun_key->u.ipv4.src;
+
+ err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
+ &fl4, &n, &ttl);
+ if (err)
+ goto free_encap;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+
+ /* It's importent to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ gen_vxlan_header_ipv4(out_dev, encap_header,
+ ipv4_encap_size, e->h_dest, tos, ttl,
+ fl4.daddr,
+ fl4.saddr, tun_key->tp_dst,
+ tunnel_id_to_key32(tun_key->tun_id));
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto destroy_neigh_entry;
+ }
+ e->encap_size = ipv4_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ err = -EAGAIN;
+ goto out;
+ }
+
+ err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+ ipv4_encap_size, encap_header, &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev = NULL;
+ struct neighbour *n = NULL;
+ struct flowi6 fl6 = {};
+ u8 nud_state, tos, ttl;
+ char *encap_header;
+ int err;
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ fl6.flowi6_proto = IPPROTO_UDP;
+ fl6.fl6_dport = tun_key->tp_dst;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto free_encap;
+ }
+
+ tos = tun_key->tos;
+ ttl = tun_key->ttl;
+
+ fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ fl6.daddr = tun_key->u.ipv6.dst;
+ fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
+ &fl6, &n, &ttl);
+ if (err)
+ goto free_encap;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+
+ /* It's importent to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ gen_vxlan_header_ipv6(out_dev, encap_header,
+ ipv6_encap_size, e->h_dest, tos, ttl,
+ &fl6.daddr,
+ &fl6.saddr, tun_key->tp_dst,
+ tunnel_id_to_key32(tun_key->tun_id));
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto destroy_neigh_entry;
+ }
+
+ e->encap_size = ipv6_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ err = -EAGAIN;
+ goto out;
+ }
+
+ err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+ ipv6_encap_size, encap_header, &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct ip_tunnel_info *tun_info,
+ struct net_device *mirred_dev,
+ struct net_device **encap_dev,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned short family = ip_tunnel_info_af(tun_info);
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct ip_tunnel_key *key = &tun_info->key;
+ struct mlx5e_encap_entry *e;
+ int tunnel_type, err = 0;
+ uintptr_t hash_key;
+ bool found = false;
+
+ /* udp dst port must be set */
+ if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
+ goto vxlan_encap_offload_err;
+
+ /* setting udp src port isn't supported */
+ if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
+vxlan_encap_offload_err:
+ netdev_warn(priv->netdev,
+ "must set udp dst port and not set udp src port\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
+ MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
+ tunnel_type = MLX5_HEADER_TYPE_VXLAN;
+ } else {
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
+ return -EOPNOTSUPP;
+ }
+
+ hash_key = hash_encap_info(key);
+
+ hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+ encap_hlist, hash_key) {
+ if (!cmp_encap_info(&e->tun_info.key, key)) {
+ found = true;
+ break;
+ }
+ }
+
+ /* must verify if encap is valid or not */
+ if (found)
+ goto attach_flow;
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+
+ e->tun_info = *tun_info;
+ e->tunnel_type = tunnel_type;
+ INIT_LIST_HEAD(&e->flows);
+
+ if (family == AF_INET)
+ err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
+ else if (family == AF_INET6)
+ err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
+
+ if (err && err != -EAGAIN)
+ goto out_err;
+
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+
+attach_flow:
+ list_add(&flow->encap, &e->flows);
+ *encap_dev = e->out_dev;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ attr->encap_id = e->encap_id;
+ else
+ err = -EAGAIN;
+
+ return err;
+
+out_err:
+ kfree(e);
+ return err;
+}
+
+static int parse_tc_vlan_action(struct mlx5e_priv *priv,
+ const struct tc_action *a,
+ struct mlx5_esw_flow_attr *attr,
+ u32 *action)
+{
+ u8 vlan_idx = attr->total_vlan;
+
+ if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
+ return -EOPNOTSUPP;
+
+ if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH))
+ return -EOPNOTSUPP;
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
+ } else {
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ }
+ } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
+ attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a);
+ attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a);
+ attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a);
+ if (!attr->vlan_proto[vlan_idx])
+ attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
+
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH))
+ return -EOPNOTSUPP;
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ } else {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
+ (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
+ tcf_vlan_push_prio(a)))
+ return -EOPNOTSUPP;
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ }
+ } else { /* action is TCA_VLAN_ACT_MODIFY */
+ return -EOPNOTSUPP;
+ }
+
+ attr->total_vlan = vlan_idx + 1;
+
+ return 0;
+}
+
+static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct ip_tunnel_info *info = NULL;
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+ bool encap = false;
+ u32 action = 0;
+ int err, i;
+
+ if (!tcf_exts_has_actions(exts))
+ return -EINVAL;
+
+ attr->in_rep = rpriv->rep;
+ attr->in_mdev = priv->mdev;
+
+ tcf_exts_for_each_action(i, a, exts) {
+ if (is_tcf_gact_shot(a)) {
+ action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ continue;
+ }
+
+ if (is_tcf_pedit(a)) {
+ err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
+ parse_attr);
+ if (err)
+ return err;
+
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ attr->mirror_count = attr->out_count;
+ continue;
+ }
+
+ if (is_tcf_csum(a)) {
+ if (csum_offload_supported(priv, action,
+ tcf_csum_update_flags(a)))
+ continue;
+
+ return -EOPNOTSUPP;
+ }
+
+ if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) {
+ struct mlx5e_priv *out_priv;
+ struct net_device *out_dev;
+
+ out_dev = tcf_mirred_dev(a);
+
+ if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+ pr_err("can't support more than %d output ports, can't offload forwarding\n",
+ attr->out_count);
+ return -EOPNOTSUPP;
+ }
+
+ if (switchdev_port_same_parent_id(priv->netdev,
+ out_dev) ||
+ is_merged_eswitch_dev(priv, out_dev)) {
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ out_priv = netdev_priv(out_dev);
+ rpriv = out_priv->ppriv;
+ attr->out_rep[attr->out_count] = rpriv->rep;
+ attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ } else if (encap) {
+ parse_attr->mirred_ifindex = out_dev->ifindex;
+ parse_attr->tun_info = *info;
+ attr->parse_attr = parse_attr;
+ action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ /* attr->out_rep is resolved when we handle encap */
+ } else {
+ pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+ priv->netdev->name, out_dev->name);
+ return -EINVAL;
+ }
+ continue;
+ }
+
+ if (is_tcf_tunnel_set(a)) {
+ info = tcf_tunnel_info(a);
+ if (info)
+ encap = true;
+ else
+ return -EOPNOTSUPP;
+ attr->mirror_count = attr->out_count;
+ continue;
+ }
+
+ if (is_tcf_vlan(a)) {
+ err = parse_tc_vlan_action(priv, a, attr, &action);
+
+ if (err)
+ return err;
+
+ attr->mirror_count = attr->out_count;
+ continue;
+ }
+
+ if (is_tcf_tunnel_release(a)) {
+ action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ continue;
+ }
+
+ return -EINVAL;
+ }
+
+ attr->action = action;
+ if (!actions_match_supported(priv, exts, parse_attr, flow))
+ return -EOPNOTSUPP;
+
+ if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void get_flags(int flags, u8 *flow_flags)
+{
+ u8 __flow_flags = 0;
+
+ if (flags & MLX5E_TC_INGRESS)
+ __flow_flags |= MLX5E_TC_FLOW_INGRESS;
+ if (flags & MLX5E_TC_EGRESS)
+ __flow_flags |= MLX5E_TC_FLOW_EGRESS;
+
+ *flow_flags = __flow_flags;
+}
+
+static const struct rhashtable_params tc_ht_params = {
+ .head_offset = offsetof(struct mlx5e_tc_flow, node),
+ .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
+ .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
+ .automatic_shrinking = true,
+};
+
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ return &uplink_rpriv->tc_ht;
+ } else
+ return &priv->fs.tc.ht;
+}
+
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5e_tc_flow *flow;
+ int attr_size, err = 0;
+ u8 flow_flags = 0;
+
+ get_flags(flags, &flow_flags);
+
+ flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+ if (flow) {
+ netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie);
+ return 0;
+ }
+
+ if (esw && esw->mode == SRIOV_OFFLOADS) {
+ flow_flags |= MLX5E_TC_FLOW_ESWITCH;
+ attr_size = sizeof(struct mlx5_esw_flow_attr);
+ } else {
+ flow_flags |= MLX5E_TC_FLOW_NIC;
+ attr_size = sizeof(struct mlx5_nic_flow_attr);
+ }
+
+ flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
+ parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
+ if (!parse_attr || !flow) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ flow->cookie = f->cookie;
+ flow->flags = flow_flags;
+ flow->priv = priv;
+
+ err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
+ if (err < 0)
+ goto err_free;
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
+ if (err < 0)
+ goto err_free;
+ flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
+ } else {
+ err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
+ if (err < 0)
+ goto err_free;
+ flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
+ }
+
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ if (err != -EAGAIN)
+ goto err_free;
+ }
+
+ if (err != -EAGAIN)
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+
+ if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
+ !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
+ kvfree(parse_attr);
+
+ err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
+ if (err) {
+ mlx5e_tc_del_flow(priv, flow);
+ kfree(flow);
+ }
+
+ return err;
+
+err_free:
+ kvfree(parse_attr);
+ kfree(flow);
+ return err;
+}
+
+#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
+#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
+
+static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
+{
+ if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
+ return true;
+
+ return false;
+}
+
+int mlx5e_delete_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags)
+{
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5e_tc_flow *flow;
+
+ flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+ if (!flow || !same_flow_direction(flow, flags))
+ return -EINVAL;
+
+ rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
+
+ mlx5e_tc_del_flow(priv, flow);
+
+ kfree(flow);
+
+ return 0;
+}
+
+int mlx5e_stats_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags)
+{
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_fc *counter;
+ u64 bytes;
+ u64 packets;
+ u64 lastuse;
+
+ flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+ if (!flow || !same_flow_direction(flow, flags))
+ return -EINVAL;
+
+ if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
+ return 0;
+
+ counter = mlx5_flow_rule_counter(flow->rule[0]);
+ if (!counter)
+ return 0;
+
+ mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+
+ tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
+
+ return 0;
+}
+
+static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
+ struct mlx5e_priv *peer_priv)
+{
+ struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
+ struct mlx5e_hairpin_entry *hpe;
+ u16 peer_vhca_id;
+ int bkt;
+
+ if (!same_hw_devs(priv, peer_priv))
+ return;
+
+ peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
+
+ hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
+ if (hpe->peer_vhca_id == peer_vhca_id)
+ hpe->hp->pair->peer_gone = true;
+ }
+}
+
+static int mlx5e_tc_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5e_flow_steering *fs;
+ struct mlx5e_priv *peer_priv;
+ struct mlx5e_tc_table *tc;
+ struct mlx5e_priv *priv;
+
+ if (ndev->netdev_ops != &mlx5e_netdev_ops ||
+ event != NETDEV_UNREGISTER ||
+ ndev->reg_state == NETREG_REGISTERED)
+ return NOTIFY_DONE;
+
+ tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
+ fs = container_of(tc, struct mlx5e_flow_steering, tc);
+ priv = container_of(fs, struct mlx5e_priv, fs);
+ peer_priv = netdev_priv(ndev);
+ if (priv == peer_priv ||
+ !(priv->netdev->features & NETIF_F_HW_TC))
+ return NOTIFY_DONE;
+
+ mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
+
+ return NOTIFY_DONE;
+}
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
+ int err;
+
+ hash_init(tc->mod_hdr_tbl);
+ hash_init(tc->hairpin_tbl);
+
+ err = rhashtable_init(&tc->ht, &tc_ht_params);
+ if (err)
+ return err;
+
+ tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
+ if (register_netdevice_notifier(&tc->netdevice_nb)) {
+ tc->netdevice_nb.notifier_call = NULL;
+ mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
+ }
+
+ return err;
+}
+
+static void _mlx5e_tc_del_flow(void *ptr, void *arg)
+{
+ struct mlx5e_tc_flow *flow = ptr;
+ struct mlx5e_priv *priv = flow->priv;
+
+ mlx5e_tc_del_flow(priv, flow);
+ kfree(flow);
+}
+
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
+
+ if (tc->netdevice_nb.notifier_call)
+ unregister_netdevice_notifier(&tc->netdevice_nb);
+
+ rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
+
+ if (!IS_ERR_OR_NULL(tc->t)) {
+ mlx5_destroy_flow_table(tc->t);
+ tc->t = NULL;
+ }
+}
+
+int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
+{
+ return rhashtable_init(tc_ht, &tc_ht_params);
+}
+
+void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
+{
+ rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
+}
+
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
+{
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+
+ return atomic_read(&tc_ht->nelems);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
new file mode 100644
index 000000000..49436bf3b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_EN_TC_H__
+#define __MLX5_EN_TC_H__
+
+#include <net/pkt_cls.h>
+
+#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+enum {
+ MLX5E_TC_INGRESS = BIT(0),
+ MLX5E_TC_EGRESS = BIT(1),
+ MLX5E_TC_LAST_EXPORTED_BIT = 1,
+};
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
+void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
+
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags);
+int mlx5e_delete_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags);
+
+int mlx5e_stats_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags);
+
+struct mlx5e_encap_entry;
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+
+struct mlx5e_neigh_hash_entry;
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
+
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_ESWITCH */
+static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
+static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; }
+#endif
+
+#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
new file mode 100644
index 000000000..52d3989bb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <net/dsfield.h>
+#include "en.h"
+#include "ipoib/ipoib.h"
+#include "en_accel/en_accel.h"
+#include "lib/clock.h"
+
+#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
+
+#ifndef CONFIG_MLX5_EN_TLS
+#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
+ MLX5E_SQ_NOPS_ROOM)
+#else
+/* TLS offload requires MLX5E_SQ_STOP_ROOM to have
+ * enough room for a resync SKB, a normal SKB and a NOP
+ */
+#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\
+ MLX5E_SQ_NOPS_ROOM)
+#endif
+
+static inline void mlx5e_tx_dma_unmap(struct device *pdev,
+ struct mlx5e_sq_dma *dma)
+{
+ switch (dma->type) {
+ case MLX5E_DMA_MAP_SINGLE:
+ dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ case MLX5E_DMA_MAP_PAGE:
+ dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ default:
+ WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+ }
+}
+
+static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+ return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq,
+ dma_addr_t addr,
+ u32 size,
+ enum mlx5e_dma_map_type map_type)
+{
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
+
+ dma->addr = addr;
+ dma->size = size;
+ dma->type = map_type;
+}
+
+static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
+{
+ int i;
+
+ for (i = 0; i < num_dma; i++) {
+ struct mlx5e_sq_dma *last_pushed_dma =
+ mlx5e_dma_get(sq, --sq->dma_fifo_pc);
+
+ mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma);
+ }
+}
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+ int dscp_cp = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+ return priv->dcbx_dp.dscp2prio[dscp_cp];
+}
+#endif
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev,
+ select_queue_fallback_t fallback)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int channel_ix = fallback(dev, skb, NULL);
+ u16 num_channels;
+ int up = 0;
+
+ if (!netdev_get_num_tc(dev))
+ return channel_ix;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
+ up = mlx5e_get_dscp_up(priv, skb);
+ else
+#endif
+ if (skb_vlan_tag_present(skb))
+ up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+
+ /* channel_ix can be larger than num_channels since
+ * dev->num_real_tx_queues = num_channels * num_tc
+ */
+ num_channels = priv->channels.params.num_channels;
+ if (channel_ix >= num_channels)
+ channel_ix = reciprocal_scale(channel_ix, num_channels);
+
+ return priv->channel_tc2txq[channel_ix][up];
+}
+
+static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
+{
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+
+ return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
+}
+
+static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb)
+{
+ struct flow_keys keys;
+
+ if (skb_transport_header_was_set(skb))
+ return skb_transport_offset(skb);
+ else if (skb_flow_dissect_flow_keys(skb, &keys, 0))
+ return keys.control.thoff;
+ else
+ return mlx5e_skb_l2_header_offset(skb);
+}
+
+static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
+ struct sk_buff *skb)
+{
+ u16 hlen;
+
+ switch (mode) {
+ case MLX5_INLINE_MODE_NONE:
+ return 0;
+ case MLX5_INLINE_MODE_TCP_UDP:
+ hlen = eth_get_headlen(skb->data, skb_headlen(skb));
+ if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
+ hlen += VLAN_HLEN;
+ break;
+ case MLX5_INLINE_MODE_IP:
+ /* When transport header is set to zero, it means no transport
+ * header. When transport header is set to 0xff's, it means
+ * transport header wasn't set.
+ */
+ if (skb_transport_offset(skb)) {
+ hlen = mlx5e_skb_l3_header_offset(skb);
+ break;
+ }
+ /* fall through */
+ case MLX5_INLINE_MODE_L2:
+ default:
+ hlen = mlx5e_skb_l2_header_offset(skb);
+ }
+ return min_t(u16, hlen, skb_headlen(skb));
+}
+
+static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
+{
+ struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start;
+ int cpy1_sz = 2 * ETH_ALEN;
+ int cpy2_sz = ihs - cpy1_sz;
+
+ memcpy(vhdr, skb->data, cpy1_sz);
+ vhdr->h_vlan_proto = skb->vlan_proto;
+ vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb));
+ memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
+}
+
+static inline void
+mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+{
+ if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+ if (skb->encapsulation) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
+ MLX5_ETH_WQE_L4_INNER_CSUM;
+ sq->stats->csum_partial_inner++;
+ } else {
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial++;
+ }
+ } else
+ sq->stats->csum_none++;
+}
+
+static inline u16
+mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+ u16 ihs;
+
+ if (skb->encapsulation) {
+ ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
+ stats->tso_inner_packets++;
+ stats->tso_inner_bytes += skb->len - ihs;
+ } else {
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
+ else
+ ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ stats->tso_packets++;
+ stats->tso_bytes += skb->len - ihs;
+ }
+
+ return ihs;
+}
+
+static inline int
+mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ unsigned char *skb_data, u16 headlen,
+ struct mlx5_wqe_data_seg *dseg)
+{
+ dma_addr_t dma_addr = 0;
+ u8 num_dma = 0;
+ int i;
+
+ if (headlen) {
+ dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+ goto dma_unmap_wqe_err;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(headlen);
+
+ mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
+ num_dma++;
+ dseg++;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+ int fsz = skb_frag_size(frag);
+
+ dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+ goto dma_unmap_wqe_err;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(fsz);
+
+ mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
+ num_dma++;
+ dseg++;
+ }
+
+ return num_dma;
+
+dma_unmap_wqe_err:
+ mlx5e_dma_unmap_wqe_err(sq, num_dma);
+ return -ENOMEM;
+}
+
+static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
+ struct mlx5_wq_cyc *wq,
+ u16 pi, u16 nnops)
+{
+ struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+
+ edge_wi = wi + nnops;
+
+ /* fill sq frag edge with nops to avoid wqe wrapping two pages */
+ for (; wi < edge_wi; wi++) {
+ wi->skb = NULL;
+ wi->num_wqebbs = 1;
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += nnops;
+}
+
+static inline void
+mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
+ struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+
+ wi->num_bytes = num_bytes;
+ wi->num_dma = num_dma;
+ wi->num_wqebbs = num_wqebbs;
+ wi->skb = skb;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+
+ netdev_tx_sent_queue(sq->txq, num_bytes);
+
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+ sq->pc += wi->num_wqebbs;
+ if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) {
+ netif_tx_stop_queue(sq->txq);
+ sq->stats->stopped++;
+ }
+
+ if (!skb->xmit_more || netif_xmit_stopped(sq->txq))
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
+}
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe *wqe, u16 pi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe_info *wi;
+
+ struct mlx5e_sq_stats *stats = sq->stats;
+ u16 headlen, ihs, contig_wqebbs_room;
+ u16 ds_cnt, ds_cnt_inl = 0;
+ u8 num_wqebbs, opcode;
+ u32 num_bytes;
+ int num_dma;
+ __be16 mss;
+
+ /* Calc ihs and ds cnt, no writes to wqe yet */
+ ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+ if (skb_is_gso(skb)) {
+ opcode = MLX5_OPCODE_LSO;
+ mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+ ihs = mlx5e_tx_get_gso_ihs(sq, skb);
+ num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
+ stats->packets += skb_shinfo(skb)->gso_segs;
+ } else {
+ opcode = MLX5_OPCODE_SEND;
+ mss = 0;
+ ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+ num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+ stats->packets++;
+ }
+
+ stats->bytes += num_bytes;
+ stats->xmit_more += skb->xmit_more;
+
+ headlen = skb->len - ihs - skb->data_len;
+ ds_cnt += !!headlen;
+ ds_cnt += skb_shinfo(skb)->nr_frags;
+
+ if (ihs) {
+ ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN;
+
+ ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
+#endif
+ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+ mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+#ifdef CONFIG_MLX5_EN_IPSEC
+ wqe->eth = cur_eth;
+#endif
+ }
+
+ /* fill wqe */
+ wi = &sq->db.wqe_info[pi];
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+
+ eseg->mss = mss;
+
+ if (ihs) {
+ eseg->inline_hdr.sz = cpu_to_be16(ihs);
+ if (skb_vlan_tag_present(skb)) {
+ ihs -= VLAN_HLEN;
+ mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs);
+ stats->added_vlan_packets++;
+ } else {
+ memcpy(eseg->inline_hdr.start, skb->data, ihs);
+ }
+ dseg += ds_cnt_inl;
+ } else if (skb_vlan_tag_present(skb)) {
+ eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
+ if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
+ eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN);
+ eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb));
+ stats->added_vlan_packets++;
+ }
+
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ if (unlikely(num_dma < 0))
+ goto err_drop;
+
+ mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
+ num_dma, wi, cseg);
+
+ return NETDEV_TX_OK;
+
+err_drop:
+ stats->dropped++;
+ dev_kfree_skb_any(skb);
+
+ return NETDEV_TX_OK;
+}
+
+netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_tx_wqe *wqe;
+ struct mlx5e_txqsq *sq;
+ u16 pi;
+
+ sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+ mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+
+ /* might send skbs and update wqe and pi */
+ skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
+ if (unlikely(!skb))
+ return NETDEV_TX_OK;
+
+ return mlx5e_sq_xmit(sq, skb, wqe, pi);
+}
+
+static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
+ struct mlx5_err_cqe *err_cqe)
+{
+ struct mlx5_cqwq *wq = &sq->cq.wq;
+ u32 ci;
+
+ ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
+
+ netdev_err(sq->channel->netdev,
+ "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+ sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
+ err_cqe->vendor_err_synd);
+ mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
+}
+
+bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
+{
+ struct mlx5e_sq_stats *stats;
+ struct mlx5e_txqsq *sq;
+ struct mlx5_cqe64 *cqe;
+ u32 dma_fifo_cc;
+ u32 nbytes;
+ u16 npkts;
+ u16 sqcc;
+ int i;
+
+ sq = container_of(cq, struct mlx5e_txqsq, cq);
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return false;
+
+ stats = sq->stats;
+
+ npkts = 0;
+ nbytes = 0;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ /* avoid dirtying sq cache line every cqe */
+ dma_fifo_cc = sq->dma_fifo_cc;
+
+ i = 0;
+ do {
+ u16 wqe_counter;
+ bool last_wqe;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+ if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
+ &sq->state)) {
+ mlx5e_dump_error_cqe(sq,
+ (struct mlx5_err_cqe *)cqe);
+ queue_work(cq->channel->priv->wq,
+ &sq->recover.recover_work);
+ }
+ stats->cqe_err++;
+ }
+
+ do {
+ struct mlx5e_tx_wqe_info *wi;
+ struct sk_buff *skb;
+ u16 ci;
+ int j;
+
+ last_wqe = (sqcc == wqe_counter);
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+ skb = wi->skb;
+
+ if (unlikely(!skb)) { /* nop */
+ sqcc++;
+ continue;
+ }
+
+ if (unlikely(skb_shinfo(skb)->tx_flags &
+ SKBTX_HW_TSTAMP)) {
+ struct skb_shared_hwtstamps hwts = {};
+
+ hwts.hwtstamp =
+ mlx5_timecounter_cyc2time(sq->clock,
+ get_cqe_ts(cqe));
+ skb_tstamp_tx(skb, &hwts);
+ }
+
+ for (j = 0; j < wi->num_dma; j++) {
+ struct mlx5e_sq_dma *dma =
+ mlx5e_dma_get(sq, dma_fifo_cc++);
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ }
+
+ npkts++;
+ nbytes += wi->num_bytes;
+ sqcc += wi->num_wqebbs;
+ napi_consume_skb(skb, napi_budget);
+ } while (!last_wqe);
+
+ } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+ stats->cqes += i;
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->dma_fifo_cc = dma_fifo_cc;
+ sq->cc = sqcc;
+
+ netdev_tx_completed_queue(sq->txq, npkts, nbytes);
+
+ if (netif_tx_queue_stopped(sq->txq) &&
+ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ MLX5E_SQ_STOP_ROOM) &&
+ !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
+ netif_tx_wake_queue(sq->txq);
+ stats->wake++;
+ }
+
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_wqe_info *wi;
+ u32 nbytes = 0;
+ u16 ci, npkts = 0;
+ struct sk_buff *skb;
+ int i;
+
+ while (sq->cc != sq->pc) {
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ wi = &sq->db.wqe_info[ci];
+ skb = wi->skb;
+
+ if (!skb) { /* nop */
+ sq->cc++;
+ continue;
+ }
+
+ for (i = 0; i < wi->num_dma; i++) {
+ struct mlx5e_sq_dma *dma =
+ mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ }
+
+ dev_kfree_skb_any(skb);
+ npkts++;
+ nbytes += wi->num_bytes;
+ sq->cc += wi->num_wqebbs;
+ }
+ netdev_tx_completed_queue(sq->txq, npkts, nbytes);
+}
+
+#ifdef CONFIG_MLX5_CORE_IPOIB
+static inline void
+mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
+ struct mlx5_wqe_datagram_seg *dseg)
+{
+ memcpy(&dseg->av, av, sizeof(struct mlx5_av));
+ dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV);
+ dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
+}
+
+netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_av *av, u32 dqpn, u32 dqkey)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5i_tx_wqe *wqe;
+
+ struct mlx5_wqe_datagram_seg *datagram;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe_info *wi;
+
+ struct mlx5e_sq_stats *stats = sq->stats;
+ u16 headlen, ihs, pi, contig_wqebbs_room;
+ u16 ds_cnt, ds_cnt_inl = 0;
+ u8 num_wqebbs, opcode;
+ u32 num_bytes;
+ int num_dma;
+ __be16 mss;
+
+ /* Calc ihs and ds cnt, no writes to wqe yet */
+ ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+ if (skb_is_gso(skb)) {
+ opcode = MLX5_OPCODE_LSO;
+ mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+ ihs = mlx5e_tx_get_gso_ihs(sq, skb);
+ num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
+ stats->packets += skb_shinfo(skb)->gso_segs;
+ } else {
+ opcode = MLX5_OPCODE_SEND;
+ mss = 0;
+ ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+ num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+ stats->packets++;
+ }
+
+ stats->bytes += num_bytes;
+ stats->xmit_more += skb->xmit_more;
+
+ headlen = skb->len - ihs - skb->data_len;
+ ds_cnt += !!headlen;
+ ds_cnt += skb_shinfo(skb)->nr_frags;
+
+ if (ihs) {
+ ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ mlx5i_sq_fetch_wqe(sq, &wqe, pi);
+
+ /* fill wqe */
+ wi = &sq->db.wqe_info[pi];
+ cseg = &wqe->ctrl;
+ datagram = &wqe->datagram;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram);
+
+ mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+
+ eseg->mss = mss;
+
+ if (ihs) {
+ memcpy(eseg->inline_hdr.start, skb->data, ihs);
+ eseg->inline_hdr.sz = cpu_to_be16(ihs);
+ dseg += ds_cnt_inl;
+ }
+
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ if (unlikely(num_dma < 0))
+ goto err_drop;
+
+ mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
+ num_dma, wi, cseg);
+
+ return NETDEV_TX_OK;
+
+err_drop:
+ stats->dropped++;
+ dev_kfree_skb_any(skb);
+
+ return NETDEV_TX_OK;
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
new file mode 100644
index 000000000..85d517360
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/irq.h>
+#include "en.h"
+#include "en/xdp.h"
+
+static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
+{
+ int current_cpu = smp_processor_id();
+ const struct cpumask *aff;
+ struct irq_data *idata;
+
+ idata = irq_desc_get_irq_data(c->irq_desc);
+ aff = irq_data_get_affinity_mask(idata);
+ return cpumask_test_cpu(current_cpu, aff);
+}
+
+static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+ struct net_dim_sample dim_sample;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
+ return;
+
+ net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes,
+ &dim_sample);
+ net_dim(&sq->dim, dim_sample);
+}
+
+static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct net_dim_sample dim_sample;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
+ return;
+
+ net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes,
+ &dim_sample);
+ net_dim(&rq->dim, dim_sample);
+}
+
+int mlx5e_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
+ napi);
+ struct mlx5e_ch_stats *ch_stats = c->stats;
+ bool busy = false;
+ int work_done = 0;
+ int i;
+
+ ch_stats->poll++;
+
+ for (i = 0; i < c->num_tc; i++)
+ busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
+
+ busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
+
+ if (c->xdp)
+ busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
+
+ if (likely(budget)) { /* budget=0 means: don't poll rx rings */
+ work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
+ busy |= work_done == budget;
+ }
+
+ busy |= c->rq.post_wqes(&c->rq);
+
+ if (busy) {
+ if (likely(mlx5e_channel_no_affinity_change(c)))
+ return budget;
+ ch_stats->aff_change++;
+ if (budget && work_done == budget)
+ work_done--;
+ }
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ return work_done;
+
+ ch_stats->arm++;
+
+ for (i = 0; i < c->num_tc; i++) {
+ mlx5e_handle_tx_dim(&c->sq[i]);
+ mlx5e_cq_arm(&c->sq[i].cq);
+ }
+
+ mlx5e_handle_rx_dim(&c->rq);
+
+ mlx5e_cq_arm(&c->rq.cq);
+ mlx5e_cq_arm(&c->icosq.cq);
+ mlx5e_cq_arm(&c->xdpsq.cq);
+
+ return work_done;
+}
+
+void mlx5e_completion_event(struct mlx5_core_cq *mcq)
+{
+ struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
+
+ napi_schedule(cq->napi);
+ cq->event_ctr++;
+ cq->channel->stats->events++;
+}
+
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event)
+{
+ struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
+ struct mlx5e_channel *c = cq->channel;
+ struct net_device *netdev = c->netdev;
+
+ netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n",
+ __func__, mcq->cqn, event);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
new file mode 100644
index 000000000..c1e1a16a9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -0,0 +1,991 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+#include "mlx5_core.h"
+#include "fpga/core.h"
+#include "eswitch.h"
+#include "lib/clock.h"
+#include "diag/fw_tracer.h"
+
+enum {
+ MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
+ MLX5_EQE_OWNER_INIT_VAL = 0x1,
+};
+
+enum {
+ MLX5_EQ_STATE_ARMED = 0x9,
+ MLX5_EQ_STATE_FIRED = 0xa,
+ MLX5_EQ_STATE_ALWAYS_ARMED = 0xb,
+};
+
+enum {
+ MLX5_NUM_SPARE_EQE = 0x80,
+ MLX5_NUM_ASYNC_EQE = 0x1000,
+ MLX5_NUM_CMD_EQE = 32,
+ MLX5_NUM_PF_DRAIN = 64,
+};
+
+enum {
+ MLX5_EQ_DOORBEL_OFFSET = 0x40,
+};
+
+#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
+ (1ull << MLX5_EVENT_TYPE_COMM_EST) | \
+ (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \
+ (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \
+ (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \
+ (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
+ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
+
+struct map_eq_in {
+ u64 mask;
+ u32 reserved;
+ u32 unmap_eqn;
+};
+
+struct cre_des_eq {
+ u8 reserved[15];
+ u8 eqn;
+};
+
+static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0};
+
+ MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
+ MLX5_SET(destroy_eq_in, in, eq_number, eqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+ return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+ return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static const char *eqe_type_str(u8 type)
+{
+ switch (type) {
+ case MLX5_EVENT_TYPE_COMP:
+ return "MLX5_EVENT_TYPE_COMP";
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ return "MLX5_EVENT_TYPE_PATH_MIG";
+ case MLX5_EVENT_TYPE_COMM_EST:
+ return "MLX5_EVENT_TYPE_COMM_EST";
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ return "MLX5_EVENT_TYPE_SQ_DRAINED";
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ return "MLX5_EVENT_TYPE_CQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+ return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return "MLX5_EVENT_TYPE_PORT_CHANGE";
+ case MLX5_EVENT_TYPE_GPIO_EVENT:
+ return "MLX5_EVENT_TYPE_GPIO_EVENT";
+ case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+ return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+ case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+ return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+ case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+ return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+ case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+ return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+ case MLX5_EVENT_TYPE_STALL_EVENT:
+ return "MLX5_EVENT_TYPE_STALL_EVENT";
+ case MLX5_EVENT_TYPE_CMD:
+ return "MLX5_EVENT_TYPE_CMD";
+ case MLX5_EVENT_TYPE_PAGE_REQUEST:
+ return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+ case MLX5_EVENT_TYPE_PAGE_FAULT:
+ return "MLX5_EVENT_TYPE_PAGE_FAULT";
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ return "MLX5_EVENT_TYPE_PPS_EVENT";
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_ERROR";
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+ case MLX5_EVENT_TYPE_DEVICE_TRACER:
+ return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+ default:
+ return "Unrecognized event";
+ }
+}
+
+static enum mlx5_dev_event port_subtype_event(u8 subtype)
+{
+ switch (subtype) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ return MLX5_DEV_EVENT_PORT_DOWN;
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ return MLX5_DEV_EVENT_PORT_UP;
+ case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+ return MLX5_DEV_EVENT_PORT_INITIALIZED;
+ case MLX5_PORT_CHANGE_SUBTYPE_LID:
+ return MLX5_DEV_EVENT_LID_CHANGE;
+ case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+ return MLX5_DEV_EVENT_PKEY_CHANGE;
+ case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+ return MLX5_DEV_EVENT_GUID_CHANGE;
+ case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+ return MLX5_DEV_EVENT_CLIENT_REREG;
+ }
+ return -1;
+}
+
+static void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static void eqe_pf_action(struct work_struct *work)
+{
+ struct mlx5_pagefault *pfault = container_of(work,
+ struct mlx5_pagefault,
+ work);
+ struct mlx5_eq *eq = pfault->eq;
+
+ mlx5_core_page_fault(eq->dev, pfault);
+ mempool_free(pfault, eq->pf_ctx.pool);
+}
+
+static void eq_pf_process(struct mlx5_eq *eq)
+{
+ struct mlx5_core_dev *dev = eq->dev;
+ struct mlx5_eqe_page_fault *pf_eqe;
+ struct mlx5_pagefault *pfault;
+ struct mlx5_eqe *eqe;
+ int set_ci = 0;
+
+ while ((eqe = next_eqe_sw(eq))) {
+ pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
+ if (!pfault) {
+ schedule_work(&eq->pf_ctx.work);
+ break;
+ }
+
+ dma_rmb();
+ pf_eqe = &eqe->data.page_fault;
+ pfault->event_subtype = eqe->sub_type;
+ pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
+
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
+ eqe->sub_type, pfault->bytes_committed);
+
+ switch (eqe->sub_type) {
+ case MLX5_PFAULT_SUBTYPE_RDMA:
+ /* RDMA based event */
+ pfault->type =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
+ pfault->token =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) &
+ MLX5_24BIT_MASK;
+ pfault->rdma.r_key =
+ be32_to_cpu(pf_eqe->rdma.r_key);
+ pfault->rdma.packet_size =
+ be16_to_cpu(pf_eqe->rdma.packet_length);
+ pfault->rdma.rdma_op_len =
+ be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+ pfault->rdma.rdma_va =
+ be64_to_cpu(pf_eqe->rdma.rdma_va);
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
+ pfault->type, pfault->token,
+ pfault->rdma.r_key);
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
+ pfault->rdma.rdma_op_len,
+ pfault->rdma.rdma_va);
+ break;
+
+ case MLX5_PFAULT_SUBTYPE_WQE:
+ /* WQE based event */
+ pfault->type =
+ (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
+ pfault->token =
+ be32_to_cpu(pf_eqe->wqe.token);
+ pfault->wqe.wq_num =
+ be32_to_cpu(pf_eqe->wqe.pftype_wq) &
+ MLX5_24BIT_MASK;
+ pfault->wqe.wqe_index =
+ be16_to_cpu(pf_eqe->wqe.wqe_index);
+ pfault->wqe.packet_size =
+ be16_to_cpu(pf_eqe->wqe.packet_length);
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+ pfault->type, pfault->token,
+ pfault->wqe.wq_num,
+ pfault->wqe.wqe_index);
+ break;
+
+ default:
+ mlx5_core_warn(dev,
+ "Unsupported page fault event sub-type: 0x%02hhx\n",
+ eqe->sub_type);
+ /* Unsupported page faults should still be
+ * resolved by the page fault handler
+ */
+ }
+
+ pfault->eq = eq;
+ INIT_WORK(&pfault->work, eqe_pf_action);
+ queue_work(eq->pf_ctx.wq, &pfault->work);
+
+ ++eq->cons_index;
+ ++set_ci;
+
+ if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
+ eq_update_ci(eq, 0);
+ set_ci = 0;
+ }
+ }
+
+ eq_update_ci(eq, 1);
+}
+
+static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
+{
+ struct mlx5_eq *eq = eq_ptr;
+ unsigned long flags;
+
+ if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
+ eq_pf_process(eq);
+ spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
+ } else {
+ schedule_work(&eq->pf_ctx.work);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* mempool_refill() was proposed but unfortunately wasn't accepted
+ * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
+ * Chip workaround.
+ */
+static void mempool_refill(mempool_t *pool)
+{
+ while (pool->curr_nr < pool->min_nr)
+ mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
+}
+
+static void eq_pf_action(struct work_struct *work)
+{
+ struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
+
+ mempool_refill(eq->pf_ctx.pool);
+
+ spin_lock_irq(&eq->pf_ctx.lock);
+ eq_pf_process(eq);
+ spin_unlock_irq(&eq->pf_ctx.lock);
+}
+
+static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
+{
+ spin_lock_init(&pf_ctx->lock);
+ INIT_WORK(&pf_ctx->work, eq_pf_action);
+
+ pf_ctx->wq = alloc_ordered_workqueue(name,
+ WQ_MEM_RECLAIM);
+ if (!pf_ctx->wq)
+ return -ENOMEM;
+
+ pf_ctx->pool = mempool_create_kmalloc_pool
+ (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
+ if (!pf_ctx->pool)
+ goto err_wq;
+
+ return 0;
+err_wq:
+ destroy_workqueue(pf_ctx->wq);
+ return -ENOMEM;
+}
+
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
+ u32 wq_num, u8 type, int error)
+{
+ u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
+
+ MLX5_SET(page_fault_resume_in, in, opcode,
+ MLX5_CMD_OP_PAGE_FAULT_RESUME);
+ MLX5_SET(page_fault_resume_in, in, error, !!error);
+ MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
+ MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
+ MLX5_SET(page_fault_resume_in, in, token, token);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
+#endif
+
+static void general_event_handler(struct mlx5_core_dev *dev,
+ struct mlx5_eqe *eqe)
+{
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
+ if (dev->event)
+ dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
+ break;
+ default:
+ mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
+ eqe->sub_type);
+ }
+}
+
+static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
+ struct mlx5_eqe *eqe)
+{
+ u64 value_lsb;
+ u64 value_msb;
+
+ value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+ value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+ mlx5_core_warn(dev,
+ "High temperature on sensors with bit set %llx %llx",
+ value_msb, value_lsb);
+}
+
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *cq = NULL;
+
+ spin_lock(&table->lock);
+ cq = radix_tree_lookup(&table->tree, cqn);
+ if (likely(cq))
+ mlx5_cq_hold(cq);
+ spin_unlock(&table->lock);
+
+ return cq;
+}
+
+static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+{
+ struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
+ return;
+ }
+
+ ++cq->arm_sn;
+
+ cq->comp(cq);
+
+ mlx5_cq_put(cq);
+}
+
+static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+{
+ struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+ return;
+ }
+
+ cq->event(cq, event_type);
+
+ mlx5_cq_put(cq);
+}
+
+static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
+{
+ struct mlx5_eq *eq = eq_ptr;
+ struct mlx5_core_dev *dev = eq->dev;
+ struct mlx5_eqe *eqe;
+ int set_ci = 0;
+ u32 cqn = -1;
+ u32 rsn;
+ u8 port;
+
+ while ((eqe = next_eqe_sw(eq))) {
+ /*
+ * Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ dma_rmb();
+
+ mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
+ eq->eqn, eqe_type_str(eqe->type));
+ switch (eqe->type) {
+ case MLX5_EVENT_TYPE_COMP:
+ cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+ mlx5_eq_cq_completion(eq, cqn);
+ break;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+ mlx5_rsc_event(dev, rsn, eqe->type);
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+ mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
+ eqe_type_str(eqe->type), eqe->type, rsn);
+ mlx5_rsc_event(dev, rsn, eqe->type);
+ break;
+
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
+ eqe_type_str(eqe->type), eqe->type, rsn);
+ mlx5_srq_event(dev, rsn, eqe->type);
+ break;
+
+ case MLX5_EVENT_TYPE_CMD:
+ mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+ break;
+
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ port = (eqe->data.port.port >> 4) & 0xf;
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ case MLX5_PORT_CHANGE_SUBTYPE_LID:
+ case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+ case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+ case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+ case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+ if (dev->event)
+ dev->event(dev, port_subtype_event(eqe->sub_type),
+ (unsigned long)port);
+ break;
+ default:
+ mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
+ port, eqe->sub_type);
+ }
+ break;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+ cqn, eqe->data.cq_err.syndrome);
+ mlx5_eq_cq_event(eq, cqn, eqe->type);
+ break;
+
+ case MLX5_EVENT_TYPE_PAGE_REQUEST:
+ {
+ u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+ s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
+
+ mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+ func_id, npages);
+ mlx5_core_req_pages_handler(dev, func_id, npages);
+ }
+ break;
+
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+ mlx5_port_module_event(dev, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ mlx5_pps_event(dev, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
+ break;
+
+ case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+ mlx5_temp_warning_event(dev, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ general_event_handler(dev, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_DEVICE_TRACER:
+ mlx5_fw_tracer_event(dev, eqe);
+ break;
+
+ default:
+ mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
+ eqe->type, eq->eqn);
+ break;
+ }
+
+ ++eq->cons_index;
+ ++set_ci;
+
+ /* The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create our EQs with MLX5_NUM_SPARE_EQE extra
+ * entries, so we must update our consumer index at
+ * least that often.
+ */
+ if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
+ eq_update_ci(eq, 0);
+ set_ci = 0;
+ }
+ }
+
+ eq_update_ci(eq, 1);
+
+ if (cqn != -1)
+ tasklet_schedule(&eq->tasklet_ctx.task);
+
+ return IRQ_HANDLED;
+}
+
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them. It is not recommended to use it, unless this is the last
+ * resort.
+ */
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
+{
+ u32 count_eqe;
+
+ disable_irq(eq->irqn);
+ count_eqe = eq->cons_index;
+ mlx5_eq_int(eq->irqn, eq);
+ count_eqe = eq->cons_index - count_eqe;
+ enable_irq(eq->irqn);
+
+ return count_eqe;
+}
+
+static void init_eq_buf(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe;
+ int i;
+
+ for (i = 0; i < eq->nent; i++) {
+ eqe = get_eqe(eq, i);
+ eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
+ }
+}
+
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+ int nent, u64 mask, const char *name,
+ enum mlx5_eq_type type)
+{
+ struct mlx5_cq_table *cq_table = &eq->cq_table;
+ u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
+ struct mlx5_priv *priv = &dev->priv;
+ irq_handler_t handler;
+ __be64 *pas;
+ void *eqc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ /* Init CQ table */
+ memset(cq_table, 0, sizeof(*cq_table));
+ spin_lock_init(&cq_table->lock);
+ INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+
+ eq->type = type;
+ eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+ eq->cons_index = 0;
+ err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
+ if (err)
+ return err;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (type == MLX5_EQ_TYPE_PF)
+ handler = mlx5_eq_pf_int;
+ else
+#endif
+ handler = mlx5_eq_int;
+
+ init_eq_buf(eq);
+
+ inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
+ MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_buf;
+ }
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
+ mlx5_fill_page_array(&eq->buf, pas);
+
+ MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
+ MLX5_SET64(create_eq_in, in, event_bitmask, mask);
+
+ eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
+ MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
+ MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
+ MLX5_SET(eqc, eqc, intr, vecidx);
+ MLX5_SET(eqc, eqc, log_page_size,
+ eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (err)
+ goto err_in;
+
+ snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
+ name, pci_name(dev->pdev));
+
+ eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
+ eq->irqn = pci_irq_vector(dev->pdev, vecidx);
+ eq->dev = dev;
+ eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
+ err = request_irq(eq->irqn, handler, 0,
+ priv->irq_info[vecidx].name, eq);
+ if (err)
+ goto err_eq;
+
+ err = mlx5_debug_eq_add(dev, eq);
+ if (err)
+ goto err_irq;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (type == MLX5_EQ_TYPE_PF) {
+ err = init_pf_ctx(&eq->pf_ctx, name);
+ if (err)
+ goto err_irq;
+ } else
+#endif
+ {
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+ (unsigned long)&eq->tasklet_ctx);
+ }
+
+ /* EQs are created in ARMED state
+ */
+ eq_update_ci(eq, 1);
+
+ kvfree(in);
+ return 0;
+
+err_irq:
+ free_irq(eq->irqn, eq);
+
+err_eq:
+ mlx5_cmd_destroy_eq(dev, eq->eqn);
+
+err_in:
+ kvfree(in);
+
+err_buf:
+ mlx5_buf_free(dev, &eq->buf);
+ return err;
+}
+
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ mlx5_debug_eq_remove(dev, eq);
+ free_irq(eq->irqn, eq);
+ err = mlx5_cmd_destroy_eq(dev, eq->eqn);
+ if (err)
+ mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
+ eq->eqn);
+ synchronize_irq(eq->irqn);
+
+ if (eq->type == MLX5_EQ_TYPE_COMP) {
+ tasklet_disable(&eq->tasklet_ctx.task);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ } else if (eq->type == MLX5_EQ_TYPE_PF) {
+ cancel_work_sync(&eq->pf_ctx.work);
+ destroy_workqueue(eq->pf_ctx.wq);
+ mempool_destroy(eq->pf_ctx.pool);
+#endif
+ }
+ mlx5_buf_free(dev, &eq->buf);
+
+ return err;
+}
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ int err;
+
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree, cq->cqn, cq);
+ spin_unlock_irq(&table->lock);
+
+ return err;
+}
+
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *tmp;
+
+ spin_lock_irq(&table->lock);
+ tmp = radix_tree_delete(&table->tree, cq->cqn);
+ spin_unlock_irq(&table->lock);
+
+ if (!tmp) {
+ mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
+ return -ENOENT;
+ }
+
+ if (tmp != cq) {
+ mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mlx5_eq_init(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ spin_lock_init(&dev->priv.eq_table.lock);
+
+ err = mlx5_eq_debugfs_init(dev);
+
+ return err;
+}
+
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+{
+ mlx5_eq_debugfs_cleanup(dev);
+}
+
+int mlx5_start_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+ int err;
+
+ if (MLX5_VPORT_MANAGER(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
+
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
+ MLX5_CAP_GEN(dev, general_notification_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
+
+ if (MLX5_CAP_GEN(dev, port_module_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
+ else
+ mlx5_core_dbg(dev, "port_module_event is not set\n");
+
+ if (MLX5_PPS_CAP(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
+
+ if (MLX5_CAP_GEN(dev, fpga))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) |
+ (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR);
+ if (MLX5_CAP_GEN_MAX(dev, dct))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
+
+ if (MLX5_CAP_GEN(dev, temp_warn_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
+
+ if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
+
+ err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
+ MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
+ "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
+ return err;
+ }
+
+ mlx5_cmd_use_events(dev);
+
+ err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
+ MLX5_NUM_ASYNC_EQE, async_event_mask,
+ "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+ goto err1;
+ }
+
+ err = mlx5_create_map_eq(dev, &table->pages_eq,
+ MLX5_EQ_VEC_PAGES,
+ /* TODO: sriov max_vf + */ 1,
+ 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
+ MLX5_EQ_TYPE_ASYNC);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
+ goto err2;
+ }
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (MLX5_CAP_GEN(dev, pg)) {
+ err = mlx5_create_map_eq(dev, &table->pfault_eq,
+ MLX5_EQ_VEC_PFAULT,
+ MLX5_NUM_ASYNC_EQE,
+ 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
+ "mlx5_page_fault_eq",
+ MLX5_EQ_TYPE_PF);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
+ err);
+ goto err3;
+ }
+ }
+
+ return err;
+err3:
+ mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+#else
+ return err;
+#endif
+
+err2:
+ mlx5_destroy_unmap_eq(dev, &table->async_eq);
+
+err1:
+ mlx5_cmd_use_polling(dev);
+ mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ return err;
+}
+
+void mlx5_stop_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ int err;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (MLX5_CAP_GEN(dev, pg)) {
+ err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
+ err);
+ }
+#endif
+
+ err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
+ err);
+
+ err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
+ err);
+ mlx5_cmd_use_polling(dev);
+
+ err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
+ err);
+}
+
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
+
+ MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+ MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (dev->rmap) {
+ free_irq_cpu_rmap(dev->rmap);
+ dev->rmap = NULL;
+ }
+#endif
+ list_for_each_entry(eq, &table->comp_eqs_list, list)
+ free_irq(eq->irqn, eq);
+
+ free_irq(table->pages_eq.irqn, &table->pages_eq);
+ free_irq(table->async_eq.irqn, &table->async_eq);
+ free_irq(table->cmd_eq.irqn, &table->cmd_eq);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (MLX5_CAP_GEN(dev, pg))
+ free_irq(table->pfault_eq.irqn, &table->pfault_eq);
+#endif
+ pci_free_irq_vectors(dev->pdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
new file mode 100644
index 000000000..2190daace
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -0,0 +1,2219 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "fs_core.h"
+
+#define UPLINK_VPORT 0xFFFF
+
+enum {
+ MLX5_ACTION_NONE = 0,
+ MLX5_ACTION_ADD = 1,
+ MLX5_ACTION_DEL = 2,
+};
+
+/* Vport UC/MC hash node */
+struct vport_addr {
+ struct l2addr_node node;
+ u8 action;
+ u32 vport;
+ struct mlx5_flow_handle *flow_rule;
+ bool mpfs; /* UC MAC was added to MPFs */
+ /* A flag indicating that mac was added due to mc promiscuous vport */
+ bool mc_promisc;
+};
+
+enum {
+ UC_ADDR_CHANGE = BIT(0),
+ MC_ADDR_CHANGE = BIT(1),
+ PROMISC_CHANGE = BIT(3),
+};
+
+/* Vport context events */
+#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \
+ MC_ADDR_CHANGE | \
+ PROMISC_CHANGE)
+
+static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
+ u32 events_mask)
+{
+ int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
+ int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
+ void *nic_vport_ctx;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1);
+
+ if (events_mask & UC_ADDR_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_uc_address_change, 1);
+ if (events_mask & MC_ADDR_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_mc_address_change, 1);
+ if (events_mask & PROMISC_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_promisc_change, 1);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+/* E-Switch vport context HW commands */
+static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+ void *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0};
+
+ MLX5_SET(modify_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
+ MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+ u16 vlan, u8 qos, u8 set_flags)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
+
+ if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
+ !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
+ return -EOPNOTSUPP;
+
+ esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n",
+ vport, vlan, qos, set_flags);
+
+ if (set_flags & SET_VLAN_STRIP)
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_strip, 1);
+
+ if (set_flags & SET_VLAN_INSERT) {
+ /* insert only if no vlan in packet */
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_insert, 1);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.cvlan_pcp, qos);
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.cvlan_id, vlan);
+ }
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.vport_cvlan_strip, 1);
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.vport_cvlan_insert, 1);
+
+ return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in));
+}
+
+/* E-Switch FDB */
+static struct mlx5_flow_handle *
+__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
+ u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
+{
+ int match_header = (is_zero_ether_addr(mac_c) ? 0 :
+ MLX5_MATCH_OUTER_HEADERS);
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_spec *spec;
+ void *mv_misc = NULL;
+ void *mc_misc = NULL;
+ u8 *dmac_v = NULL;
+ u8 *dmac_c = NULL;
+
+ if (rx_rule)
+ match_header |= MLX5_MATCH_MISC_PARAMETERS;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+
+ dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dmac_47_16);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dmac_47_16);
+
+ if (match_header & MLX5_MATCH_OUTER_HEADERS) {
+ ether_addr_copy(dmac_v, mac_v);
+ ether_addr_copy(dmac_c, mac_c);
+ }
+
+ if (match_header & MLX5_MATCH_MISC_PARAMETERS) {
+ mv_misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT);
+ MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port);
+ }
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = vport;
+
+ esw_debug(esw->dev,
+ "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
+ dmac_v, dmac_c, vport);
+ spec->match_criteria_enable = match_header;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule =
+ mlx5_add_flow_rules(esw->fdb_table.legacy.fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev,
+ "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
+ dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
+ flow_rule = NULL;
+ }
+
+ kvfree(spec);
+ return flow_rule;
+}
+
+static struct mlx5_flow_handle *
+esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
+{
+ u8 mac_c[ETH_ALEN];
+
+ eth_broadcast_addr(mac_c);
+ return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac);
+}
+
+static struct mlx5_flow_handle *
+esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
+{
+ u8 mac_c[ETH_ALEN];
+ u8 mac_v[ETH_ALEN];
+
+ eth_zero_addr(mac_c);
+ eth_zero_addr(mac_v);
+ mac_c[0] = 0x01;
+ mac_v[0] = 0x01;
+ return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v);
+}
+
+static struct mlx5_flow_handle *
+esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
+{
+ u8 mac_c[ETH_ALEN];
+ u8 mac_v[ETH_ALEN];
+
+ eth_zero_addr(mac_c);
+ eth_zero_addr(mac_v);
+ return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
+}
+
+static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int table_size;
+ u32 *flow_group_in;
+ u8 *dmac;
+ int err = 0;
+
+ esw_debug(dev, "Create FDB log_max_size(%d)\n",
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+ ft_attr.max_fte = table_size;
+ fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create FDB Table err %d\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.fdb = fdb;
+
+ /* Addresses group : Full match unicast/multicast addresses */
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ /* Preserve 2 entries for allmulti and promisc rules*/
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3);
+ eth_broadcast_addr(dmac);
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.addr_grp = g;
+
+ /* Allmulti group : One rule that forwards any mcast traffic */
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2);
+ eth_zero_addr(dmac);
+ dmac[0] = 0x01;
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.allmulti_grp = g;
+
+ /* Promiscuous group :
+ * One rule that forward all unmatched traffic from previous groups
+ */
+ eth_zero_addr(dmac);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.promisc_grp = g;
+
+out:
+ if (err) {
+ if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) {
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+ esw->fdb_table.legacy.allmulti_grp = NULL;
+ }
+ if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) {
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+ esw->fdb_table.legacy.addr_grp = NULL;
+ }
+ if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.fdb)) {
+ mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
+ esw->fdb_table.legacy.fdb = NULL;
+ }
+ }
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+ if (!esw->fdb_table.legacy.fdb)
+ return;
+
+ esw_debug(esw->dev, "Destroy FDB Table\n");
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+ mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
+ esw->fdb_table.legacy.fdb = NULL;
+ esw->fdb_table.legacy.addr_grp = NULL;
+ esw->fdb_table.legacy.allmulti_grp = NULL;
+ esw->fdb_table.legacy.promisc_grp = NULL;
+}
+
+/* E-Switch vport UC/MC lists management */
+typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
+ struct vport_addr *vaddr);
+
+static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+ int err;
+
+ /* Skip mlx5_mpfs_add_mac for PFs,
+ * it is already done by the PF netdev in mlx5e_execute_l2_action
+ */
+ if (!vport)
+ goto fdb_add;
+
+ err = mlx5_mpfs_add_mac(esw->dev, mac);
+ if (err) {
+ esw_warn(esw->dev,
+ "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n",
+ mac, vport, err);
+ return err;
+ }
+ vaddr->mpfs = true;
+
+fdb_add:
+ /* SRIOV is enabled: Forward UC MAC to vport */
+ if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY)
+ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+
+ esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
+ vport, mac, vaddr->flow_rule);
+
+ return 0;
+}
+
+static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+ int err = 0;
+
+ /* Skip mlx5_mpfs_del_mac for PFs,
+ * it is already done by the PF netdev in mlx5e_execute_l2_action
+ */
+ if (!vport || !vaddr->mpfs)
+ goto fdb_del;
+
+ err = mlx5_mpfs_del_mac(esw->dev, mac);
+ if (err)
+ esw_warn(esw->dev,
+ "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n",
+ mac, vport, err);
+ vaddr->mpfs = false;
+
+fdb_del:
+ if (vaddr->flow_rule)
+ mlx5_del_flow_rules(vaddr->flow_rule);
+ vaddr->flow_rule = NULL;
+
+ return 0;
+}
+
+static void update_allmulti_vports(struct mlx5_eswitch *esw,
+ struct vport_addr *vaddr,
+ struct esw_mc_addr *esw_mc)
+{
+ u8 *mac = vaddr->node.addr;
+ u32 vport_idx = 0;
+
+ for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) {
+ struct mlx5_vport *vport = &esw->vports[vport_idx];
+ struct hlist_head *vport_hash = vport->mc_list;
+ struct vport_addr *iter_vaddr =
+ l2addr_hash_find(vport_hash,
+ mac,
+ struct vport_addr);
+ if (IS_ERR_OR_NULL(vport->allmulti_rule) ||
+ vaddr->vport == vport_idx)
+ continue;
+ switch (vaddr->action) {
+ case MLX5_ACTION_ADD:
+ if (iter_vaddr)
+ continue;
+ iter_vaddr = l2addr_hash_add(vport_hash, mac,
+ struct vport_addr,
+ GFP_KERNEL);
+ if (!iter_vaddr) {
+ esw_warn(esw->dev,
+ "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n",
+ mac, vport_idx);
+ continue;
+ }
+ iter_vaddr->vport = vport_idx;
+ iter_vaddr->flow_rule =
+ esw_fdb_set_vport_rule(esw,
+ mac,
+ vport_idx);
+ iter_vaddr->mc_promisc = true;
+ break;
+ case MLX5_ACTION_DEL:
+ if (!iter_vaddr)
+ continue;
+ mlx5_del_flow_rules(iter_vaddr->flow_rule);
+ l2addr_hash_del(iter_vaddr);
+ break;
+ }
+ }
+}
+
+static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->mc_table;
+ struct esw_mc_addr *esw_mc;
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+
+ if (!esw->fdb_table.legacy.fdb)
+ return 0;
+
+ esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+ if (esw_mc)
+ goto add;
+
+ esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL);
+ if (!esw_mc)
+ return -ENOMEM;
+
+ esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
+ esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT);
+
+ /* Add this multicast mac to all the mc promiscuous vports */
+ update_allmulti_vports(esw, vaddr, esw_mc);
+
+add:
+ /* If the multicast mac is added as a result of mc promiscuous vport,
+ * don't increment the multicast ref count
+ */
+ if (!vaddr->mc_promisc)
+ esw_mc->refcnt++;
+
+ /* Forward MC MAC to vport */
+ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+ esw_debug(esw->dev,
+ "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+ vport, mac, vaddr->flow_rule,
+ esw_mc->refcnt, esw_mc->uplink_rule);
+ return 0;
+}
+
+static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->mc_table;
+ struct esw_mc_addr *esw_mc;
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+
+ if (!esw->fdb_table.legacy.fdb)
+ return 0;
+
+ esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+ if (!esw_mc) {
+ esw_warn(esw->dev,
+ "Failed to find eswitch MC addr for MAC(%pM) vport(%d)",
+ mac, vport);
+ return -EINVAL;
+ }
+ esw_debug(esw->dev,
+ "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+ vport, mac, vaddr->flow_rule, esw_mc->refcnt,
+ esw_mc->uplink_rule);
+
+ if (vaddr->flow_rule)
+ mlx5_del_flow_rules(vaddr->flow_rule);
+ vaddr->flow_rule = NULL;
+
+ /* If the multicast mac is added as a result of mc promiscuous vport,
+ * don't decrement the multicast ref count.
+ */
+ if (vaddr->mc_promisc || (--esw_mc->refcnt > 0))
+ return 0;
+
+ /* Remove this multicast mac from all the mc promiscuous vports */
+ update_allmulti_vports(esw, vaddr, esw_mc);
+
+ if (esw_mc->uplink_rule)
+ mlx5_del_flow_rules(esw_mc->uplink_rule);
+
+ l2addr_hash_del(esw_mc);
+ return 0;
+}
+
+/* Apply vport UC/MC list to HW l2 table and FDB table */
+static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
+ u32 vport_num, int list_type)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+ vport_addr_action vport_addr_add;
+ vport_addr_action vport_addr_del;
+ struct vport_addr *addr;
+ struct l2addr_node *node;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int hi;
+
+ vport_addr_add = is_uc ? esw_add_uc_addr :
+ esw_add_mc_addr;
+ vport_addr_del = is_uc ? esw_del_uc_addr :
+ esw_del_mc_addr;
+
+ hash = is_uc ? vport->uc_list : vport->mc_list;
+ for_each_l2hash_node(node, tmp, hash, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ switch (addr->action) {
+ case MLX5_ACTION_ADD:
+ vport_addr_add(esw, addr);
+ addr->action = MLX5_ACTION_NONE;
+ break;
+ case MLX5_ACTION_DEL:
+ vport_addr_del(esw, addr);
+ l2addr_hash_del(addr);
+ break;
+ }
+ }
+}
+
+/* Sync vport UC/MC list from vport context */
+static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
+ u32 vport_num, int list_type)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+ u8 (*mac_list)[ETH_ALEN];
+ struct l2addr_node *node;
+ struct vport_addr *addr;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int size;
+ int err;
+ int hi;
+ int i;
+
+ size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) :
+ MLX5_MAX_MC_PER_VPORT(esw->dev);
+
+ mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+ if (!mac_list)
+ return;
+
+ hash = is_uc ? vport->uc_list : vport->mc_list;
+
+ for_each_l2hash_node(node, tmp, hash, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ addr->action = MLX5_ACTION_DEL;
+ }
+
+ if (!vport->enabled)
+ goto out;
+
+ err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type,
+ mac_list, &size);
+ if (err)
+ goto out;
+ esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n",
+ vport_num, is_uc ? "UC" : "MC", size);
+
+ for (i = 0; i < size; i++) {
+ if (is_uc && !is_valid_ether_addr(mac_list[i]))
+ continue;
+
+ if (!is_uc && !is_multicast_ether_addr(mac_list[i]))
+ continue;
+
+ addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr);
+ if (addr) {
+ addr->action = MLX5_ACTION_NONE;
+ /* If this mac was previously added because of allmulti
+ * promiscuous rx mode, its now converted to be original
+ * vport mac.
+ */
+ if (addr->mc_promisc) {
+ struct esw_mc_addr *esw_mc =
+ l2addr_hash_find(esw->mc_table,
+ mac_list[i],
+ struct esw_mc_addr);
+ if (!esw_mc) {
+ esw_warn(esw->dev,
+ "Failed to MAC(%pM) in mcast DB\n",
+ mac_list[i]);
+ continue;
+ }
+ esw_mc->refcnt++;
+ addr->mc_promisc = false;
+ }
+ continue;
+ }
+
+ addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr,
+ GFP_KERNEL);
+ if (!addr) {
+ esw_warn(esw->dev,
+ "Failed to add MAC(%pM) to vport[%d] DB\n",
+ mac_list[i], vport_num);
+ continue;
+ }
+ addr->vport = vport_num;
+ addr->action = MLX5_ACTION_ADD;
+ }
+out:
+ kfree(mac_list);
+}
+
+/* Sync vport UC/MC list from vport context
+ * Must be called after esw_update_vport_addr_list
+ */
+static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct l2addr_node *node;
+ struct vport_addr *addr;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int hi;
+
+ hash = vport->mc_list;
+
+ for_each_l2hash_node(node, tmp, esw->mc_table, hi) {
+ u8 *mac = node->addr;
+
+ addr = l2addr_hash_find(hash, mac, struct vport_addr);
+ if (addr) {
+ if (addr->action == MLX5_ACTION_DEL)
+ addr->action = MLX5_ACTION_NONE;
+ continue;
+ }
+ addr = l2addr_hash_add(hash, mac, struct vport_addr,
+ GFP_KERNEL);
+ if (!addr) {
+ esw_warn(esw->dev,
+ "Failed to add allmulti MAC(%pM) to vport[%d] DB\n",
+ mac, vport_num);
+ continue;
+ }
+ addr->vport = vport_num;
+ addr->action = MLX5_ACTION_ADD;
+ addr->mc_promisc = true;
+ }
+}
+
+/* Apply vport rx mode to HW FDB table */
+static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
+ bool promisc, bool mc_promisc)
+{
+ struct esw_mc_addr *allmulti_addr = &esw->mc_promisc;
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+
+ if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc)
+ goto promisc;
+
+ if (mc_promisc) {
+ vport->allmulti_rule =
+ esw_fdb_set_vport_allmulti_rule(esw, vport_num);
+ if (!allmulti_addr->uplink_rule)
+ allmulti_addr->uplink_rule =
+ esw_fdb_set_vport_allmulti_rule(esw,
+ UPLINK_VPORT);
+ allmulti_addr->refcnt++;
+ } else if (vport->allmulti_rule) {
+ mlx5_del_flow_rules(vport->allmulti_rule);
+ vport->allmulti_rule = NULL;
+
+ if (--allmulti_addr->refcnt > 0)
+ goto promisc;
+
+ if (allmulti_addr->uplink_rule)
+ mlx5_del_flow_rules(allmulti_addr->uplink_rule);
+ allmulti_addr->uplink_rule = NULL;
+ }
+
+promisc:
+ if (IS_ERR_OR_NULL(vport->promisc_rule) != promisc)
+ return;
+
+ if (promisc) {
+ vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw,
+ vport_num);
+ } else if (vport->promisc_rule) {
+ mlx5_del_flow_rules(vport->promisc_rule);
+ vport->promisc_rule = NULL;
+ }
+}
+
+/* Sync vport rx mode from vport context */
+static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ int promisc_all = 0;
+ int promisc_uc = 0;
+ int promisc_mc = 0;
+ int err;
+
+ err = mlx5_query_nic_vport_promisc(esw->dev,
+ vport_num,
+ &promisc_uc,
+ &promisc_mc,
+ &promisc_all);
+ if (err)
+ return;
+ esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n",
+ vport_num, promisc_all, promisc_mc);
+
+ if (!vport->info.trusted || !vport->enabled) {
+ promisc_uc = 0;
+ promisc_mc = 0;
+ promisc_all = 0;
+ }
+
+ esw_apply_vport_rx_mode(esw, vport_num, promisc_all,
+ (promisc_all || promisc_mc));
+}
+
+static void esw_vport_change_handle_locked(struct mlx5_vport *vport)
+{
+ struct mlx5_core_dev *dev = vport->dev;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ u8 mac[ETH_ALEN];
+
+ mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
+ esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
+ vport->vport, mac);
+
+ if (vport->enabled_events & UC_ADDR_CHANGE) {
+ esw_update_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_UC);
+ esw_apply_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_UC);
+ }
+
+ if (vport->enabled_events & MC_ADDR_CHANGE) {
+ esw_update_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_MC);
+ }
+
+ if (vport->enabled_events & PROMISC_CHANGE) {
+ esw_update_vport_rx_mode(esw, vport->vport);
+ if (!IS_ERR_OR_NULL(vport->allmulti_rule))
+ esw_update_vport_mc_promisc(esw, vport->vport);
+ }
+
+ if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) {
+ esw_apply_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_MC);
+ }
+
+ esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport);
+ if (vport->enabled)
+ arm_vport_context_events_cmd(dev, vport->vport,
+ vport->enabled_events);
+}
+
+static void esw_vport_change_handler(struct work_struct *work)
+{
+ struct mlx5_vport *vport =
+ container_of(work, struct mlx5_vport, vport_change_handler);
+ struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
+
+ mutex_lock(&esw->state_lock);
+ esw_vport_change_handle_locked(vport);
+ mutex_unlock(&esw->state_lock);
+}
+
+static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *vlan_grp = NULL;
+ struct mlx5_flow_group *drop_grp = NULL;
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *acl;
+ void *match_criteria;
+ u32 *flow_group_in;
+ /* The egress acl table contains 2 rules:
+ * 1)Allow traffic with vlan_tag=vst_vlan_id
+ * 2)Drop all other traffic.
+ */
+ int table_size = 2;
+ int err = 0;
+
+ if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
+ return -EOPNOTSUPP;
+
+ if (!IS_ERR_OR_NULL(vport->egress.acl))
+ return 0;
+
+ esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n",
+ vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
+
+ root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ vport->vport);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ vlan_grp = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(vlan_grp)) {
+ err = PTR_ERR(vlan_grp);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ drop_grp = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(drop_grp)) {
+ err = PTR_ERR(drop_grp);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+
+ vport->egress.acl = acl;
+ vport->egress.drop_grp = drop_grp;
+ vport->egress.allowed_vlans_grp = vlan_grp;
+out:
+ kvfree(flow_group_in);
+ if (err && !IS_ERR_OR_NULL(vlan_grp))
+ mlx5_destroy_flow_group(vlan_grp);
+ if (err && !IS_ERR_OR_NULL(acl))
+ mlx5_destroy_flow_table(acl);
+ return err;
+}
+
+static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan))
+ mlx5_del_flow_rules(vport->egress.allowed_vlan);
+
+ if (!IS_ERR_OR_NULL(vport->egress.drop_rule))
+ mlx5_del_flow_rules(vport->egress.drop_rule);
+
+ vport->egress.allowed_vlan = NULL;
+ vport->egress.drop_rule = NULL;
+}
+
+static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->egress.acl))
+ return;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
+
+ esw_vport_cleanup_egress_rules(esw, vport);
+ mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp);
+ mlx5_destroy_flow_group(vport->egress.drop_grp);
+ mlx5_destroy_flow_table(vport->egress.acl);
+ vport->egress.allowed_vlans_grp = NULL;
+ vport->egress.drop_grp = NULL;
+ vport->egress.acl = NULL;
+}
+
+static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ /* The ingress acl table contains 4 groups
+ * (2 active rules at the same time -
+ * 1 allow rule from one of the first 3 groups.
+ * 1 drop rule from the last group):
+ * 1)Allow untagged traffic with smac=original mac.
+ * 2)Allow untagged traffic.
+ * 3)Allow traffic with smac=original mac.
+ * 4)Drop all other traffic.
+ */
+ int table_size = 4;
+ int err = 0;
+
+ if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
+ return -EOPNOTSUPP;
+
+ if (!IS_ERR_OR_NULL(vport->ingress.acl))
+ return 0;
+
+ esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n",
+ vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
+
+ root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ vport->vport);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.acl = acl;
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.allow_untagged_spoofchk_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.allow_untagged_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.allow_spoofchk_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.drop_grp = g;
+
+out:
+ if (err) {
+ if (!IS_ERR_OR_NULL(vport->ingress.allow_spoofchk_only_grp))
+ mlx5_destroy_flow_group(
+ vport->ingress.allow_spoofchk_only_grp);
+ if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_only_grp))
+ mlx5_destroy_flow_group(
+ vport->ingress.allow_untagged_only_grp);
+ if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_spoofchk_grp))
+ mlx5_destroy_flow_group(
+ vport->ingress.allow_untagged_spoofchk_grp);
+ if (!IS_ERR_OR_NULL(vport->ingress.acl))
+ mlx5_destroy_flow_table(vport->ingress.acl);
+ }
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->ingress.drop_rule))
+ mlx5_del_flow_rules(vport->ingress.drop_rule);
+
+ if (!IS_ERR_OR_NULL(vport->ingress.allow_rule))
+ mlx5_del_flow_rules(vport->ingress.allow_rule);
+
+ vport->ingress.drop_rule = NULL;
+ vport->ingress.allow_rule = NULL;
+}
+
+static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->ingress.acl))
+ return;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
+
+ esw_vport_cleanup_ingress_rules(esw, vport);
+ mlx5_destroy_flow_group(vport->ingress.allow_spoofchk_only_grp);
+ mlx5_destroy_flow_group(vport->ingress.allow_untagged_only_grp);
+ mlx5_destroy_flow_group(vport->ingress.allow_untagged_spoofchk_grp);
+ mlx5_destroy_flow_group(vport->ingress.drop_grp);
+ mlx5_destroy_flow_table(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+ vport->ingress.drop_grp = NULL;
+ vport->ingress.allow_spoofchk_only_grp = NULL;
+ vport->ingress.allow_untagged_only_grp = NULL;
+ vport->ingress.allow_untagged_spoofchk_grp = NULL;
+}
+
+static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_fc *counter = vport->ingress.drop_counter;
+ struct mlx5_flow_destination drop_ctr_dst = {0};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_spec *spec;
+ int dest_num = 0;
+ int err = 0;
+ u8 *smac_v;
+
+ esw_vport_cleanup_ingress_rules(esw, vport);
+
+ if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
+ esw_vport_disable_ingress_acl(esw, vport);
+ return 0;
+ }
+
+ err = esw_vport_enable_ingress_acl(esw, vport);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "failed to enable ingress acl (%d) on vport[%d]\n",
+ err, vport->vport);
+ return err;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (vport->info.vlan || vport->info.qos)
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+
+ if (vport->info.spoofchk) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
+ smac_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ outer_headers.smac_47_16);
+ ether_addr_copy(smac_v, vport->info.mac);
+ }
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ vport->ingress.allow_rule =
+ mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.allow_rule)) {
+ err = PTR_ERR(vport->ingress.allow_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress allow rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.allow_rule = NULL;
+ goto out;
+ }
+
+ memset(spec, 0, sizeof(*spec));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /* Attach drop flow counter */
+ if (counter) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter = counter;
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->ingress.drop_rule =
+ mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->ingress.drop_rule)) {
+ err = PTR_ERR(vport->ingress.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress drop rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.drop_rule = NULL;
+ goto out;
+ }
+
+out:
+ if (err)
+ esw_vport_cleanup_ingress_rules(esw, vport);
+ kvfree(spec);
+ return err;
+}
+
+static int esw_vport_egress_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_fc *counter = vport->egress.drop_counter;
+ struct mlx5_flow_destination drop_ctr_dst = {0};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_spec *spec;
+ int dest_num = 0;
+ int err = 0;
+
+ esw_vport_cleanup_egress_rules(esw, vport);
+
+ if (!vport->info.vlan && !vport->info.qos) {
+ esw_vport_disable_egress_acl(esw, vport);
+ return 0;
+ }
+
+ err = esw_vport_enable_egress_acl(esw, vport);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "failed to enable egress acl (%d) on vport[%d]\n",
+ err, vport->vport);
+ return err;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Allowed vlan rule */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ vport->egress.allowed_vlan =
+ mlx5_add_flow_rules(vport->egress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->egress.allowed_vlan)) {
+ err = PTR_ERR(vport->egress.allowed_vlan);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.allowed_vlan = NULL;
+ goto out;
+ }
+
+ /* Drop others rule (star rule) */
+ memset(spec, 0, sizeof(*spec));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /* Attach egress drop flow counter */
+ if (counter) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter = counter;
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->egress.drop_rule =
+ mlx5_add_flow_rules(vport->egress.acl, spec,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->egress.drop_rule)) {
+ err = PTR_ERR(vport->egress.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress drop rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.drop_rule = NULL;
+ }
+out:
+ kvfree(spec);
+ return err;
+}
+
+/* Vport QoS management */
+static int esw_create_tsar(struct mlx5_eswitch *esw)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ struct mlx5_core_dev *dev = esw->dev;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return 0;
+
+ if (esw->qos.enabled)
+ return -EEXIST;
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &esw->qos.root_tsar_id);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
+ return err;
+ }
+
+ esw->qos.enabled = true;
+ return 0;
+}
+
+static void esw_destroy_tsar(struct mlx5_eswitch *esw)
+{
+ int err;
+
+ if (!esw->qos.enabled)
+ return;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_id);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
+
+ esw->qos.enabled = false;
+}
+
+static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
+ u32 initial_max_rate, u32 initial_bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct mlx5_core_dev *dev = esw->dev;
+ void *vport_elem;
+ int err = 0;
+
+ if (!esw->qos.enabled || !MLX5_CAP_GEN(dev, qos) ||
+ !MLX5_CAP_QOS(dev, esw_scheduling))
+ return 0;
+
+ if (vport->qos.enabled)
+ return -EEXIST;
+
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
+ element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
+ esw->qos.root_tsar_id);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
+ initial_max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ &vport->qos.esw_tsar_ix);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+ vport_num, err);
+ return err;
+ }
+
+ vport->qos.enabled = true;
+ return 0;
+}
+
+static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ int err = 0;
+
+ if (!vport->qos.enabled)
+ return;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
+ vport_num, err);
+
+ vport->qos.enabled = false;
+}
+
+static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num,
+ u32 max_rate, u32 bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct mlx5_core_dev *dev = esw->dev;
+ void *vport_elem;
+ u32 bitmask = 0;
+ int err = 0;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+ if (!vport->qos.enabled)
+ return -EIO;
+
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
+ element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
+ esw->qos.root_tsar_id);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
+ max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+
+ err = mlx5_modify_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ vport->qos.esw_tsar_ix,
+ bitmask);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
+ vport_num, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
+{
+ ((u8 *)node_guid)[7] = mac[0];
+ ((u8 *)node_guid)[6] = mac[1];
+ ((u8 *)node_guid)[5] = mac[2];
+ ((u8 *)node_guid)[4] = 0xff;
+ ((u8 *)node_guid)[3] = 0xfe;
+ ((u8 *)node_guid)[2] = mac[3];
+ ((u8 *)node_guid)[1] = mac[4];
+ ((u8 *)node_guid)[0] = mac[5];
+}
+
+static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int vport_num = vport->vport;
+
+ if (!vport_num)
+ return;
+
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport_num,
+ vport->info.link_state);
+ mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
+ mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+ modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
+ (vport->info.vlan || vport->info.qos));
+
+ /* Only legacy mode needs ACLs */
+ if (esw->mode == SRIOV_LEGACY) {
+ esw_vport_ingress_config(esw, vport);
+ esw_vport_egress_config(esw, vport);
+ }
+}
+
+static void esw_vport_create_drop_counters(struct mlx5_vport *vport)
+{
+ struct mlx5_core_dev *dev = vport->dev;
+
+ if (MLX5_CAP_ESW_INGRESS_ACL(dev, flow_counter)) {
+ vport->ingress.drop_counter = mlx5_fc_create(dev, false);
+ if (IS_ERR(vport->ingress.drop_counter)) {
+ esw_warn(dev,
+ "vport[%d] configure ingress drop rule counter failed\n",
+ vport->vport);
+ vport->ingress.drop_counter = NULL;
+ }
+ }
+
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, flow_counter)) {
+ vport->egress.drop_counter = mlx5_fc_create(dev, false);
+ if (IS_ERR(vport->egress.drop_counter)) {
+ esw_warn(dev,
+ "vport[%d] configure egress drop rule counter failed\n",
+ vport->vport);
+ vport->egress.drop_counter = NULL;
+ }
+ }
+}
+
+static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport)
+{
+ struct mlx5_core_dev *dev = vport->dev;
+
+ if (vport->ingress.drop_counter)
+ mlx5_fc_destroy(dev, vport->ingress.drop_counter);
+ if (vport->egress.drop_counter)
+ mlx5_fc_destroy(dev, vport->egress.drop_counter);
+}
+
+static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
+ int enable_events)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+
+ mutex_lock(&esw->state_lock);
+ WARN_ON(vport->enabled);
+
+ esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
+
+ /* Create steering drop counters for ingress and egress ACLs */
+ if (vport_num && esw->mode == SRIOV_LEGACY)
+ esw_vport_create_drop_counters(vport);
+
+ /* Restore old vport configuration */
+ esw_apply_vport_conf(esw, vport);
+
+ /* Attach vport to the eswitch rate limiter */
+ if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate,
+ vport->qos.bw_share))
+ esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num);
+
+ /* Sync with current vport context */
+ vport->enabled_events = enable_events;
+ vport->enabled = true;
+
+ /* only PF is trusted by default */
+ if (!vport_num)
+ vport->info.trusted = true;
+
+ esw_vport_change_handle_locked(vport);
+
+ esw->enabled_vports++;
+ esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num);
+ mutex_unlock(&esw->state_lock);
+}
+
+static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+
+ if (!vport->enabled)
+ return;
+
+ esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num);
+ /* Mark this vport as disabled to discard new events */
+ vport->enabled = false;
+
+ synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
+ /* Wait for current already scheduled events to complete */
+ flush_workqueue(esw->work_queue);
+ /* Disable events from this vport */
+ arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
+ mutex_lock(&esw->state_lock);
+ /* We don't assume VFs will cleanup after themselves.
+ * Calling vport change handler while vport is disabled will cleanup
+ * the vport resources.
+ */
+ esw_vport_change_handle_locked(vport);
+ vport->enabled_events = 0;
+ esw_vport_disable_qos(esw, vport_num);
+ if (vport_num && esw->mode == SRIOV_LEGACY) {
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport_num,
+ MLX5_VPORT_ADMIN_STATE_DOWN);
+ esw_vport_disable_egress_acl(esw, vport);
+ esw_vport_disable_ingress_acl(esw, vport);
+ esw_vport_destroy_drop_counters(vport);
+ }
+ esw->enabled_vports--;
+ mutex_unlock(&esw->state_lock);
+}
+
+/* Public E-Switch API */
+#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
+
+
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
+{
+ int err;
+ int i, enabled_events;
+
+ if (!ESW_ALLOWED(esw) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
+ esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
+ esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n");
+
+ if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
+ esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
+
+ esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
+ esw->mode = mode;
+
+ if (mode == SRIOV_LEGACY) {
+ err = esw_create_legacy_fdb_table(esw);
+ } else {
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
+ err = esw_offloads_init(esw, nvfs + 1);
+ }
+
+ if (err)
+ goto abort;
+
+ err = esw_create_tsar(esw);
+ if (err)
+ esw_warn(esw->dev, "Failed to create eswitch TSAR");
+
+ /* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
+ * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
+ * 2. FDB/Eswitch is programmed by user space tools
+ */
+ enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
+ for (i = 0; i <= nvfs; i++)
+ esw_enable_vport(esw, i, enabled_events);
+
+ esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
+ esw->enabled_vports);
+ return 0;
+
+abort:
+ esw->mode = SRIOV_NONE;
+
+ if (mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
+ return err;
+}
+
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
+{
+ struct esw_mc_addr *mc_promisc;
+ int old_mode;
+ int nvports;
+ int i;
+
+ if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
+ return;
+
+ esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
+ esw->enabled_vports, esw->mode);
+
+ mc_promisc = &esw->mc_promisc;
+ nvports = esw->enabled_vports;
+
+ for (i = 0; i < esw->total_vports; i++)
+ esw_disable_vport(esw, i);
+
+ if (mc_promisc && mc_promisc->uplink_rule)
+ mlx5_del_flow_rules(mc_promisc->uplink_rule);
+
+ esw_destroy_tsar(esw);
+
+ if (esw->mode == SRIOV_LEGACY)
+ esw_destroy_legacy_fdb_table(esw);
+ else if (esw->mode == SRIOV_OFFLOADS)
+ esw_offloads_cleanup(esw, nvports);
+
+ old_mode = esw->mode;
+ esw->mode = SRIOV_NONE;
+
+ if (old_mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+}
+
+int mlx5_eswitch_init(struct mlx5_core_dev *dev)
+{
+ int total_vports = MLX5_TOTAL_VPORTS(dev);
+ struct mlx5_eswitch *esw;
+ int vport_num;
+ int err;
+
+ if (!MLX5_VPORT_MANAGER(dev))
+ return 0;
+
+ esw_info(dev,
+ "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
+ total_vports,
+ MLX5_MAX_UC_PER_VPORT(dev),
+ MLX5_MAX_MC_PER_VPORT(dev));
+
+ esw = kzalloc(sizeof(*esw), GFP_KERNEL);
+ if (!esw)
+ return -ENOMEM;
+
+ esw->dev = dev;
+
+ esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
+ if (!esw->work_queue) {
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport),
+ GFP_KERNEL);
+ if (!esw->vports) {
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ err = esw_offloads_init_reps(esw);
+ if (err)
+ goto abort;
+
+ hash_init(esw->offloads.encap_tbl);
+ hash_init(esw->offloads.mod_hdr_tbl);
+ mutex_init(&esw->state_lock);
+
+ for (vport_num = 0; vport_num < total_vports; vport_num++) {
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+
+ vport->vport = vport_num;
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ vport->dev = dev;
+ INIT_WORK(&vport->vport_change_handler,
+ esw_vport_change_handler);
+ }
+
+ esw->total_vports = total_vports;
+ esw->enabled_vports = 0;
+ esw->mode = SRIOV_NONE;
+ esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+ else
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+
+ dev->priv.eswitch = esw;
+ return 0;
+abort:
+ if (esw->work_queue)
+ destroy_workqueue(esw->work_queue);
+ esw_offloads_cleanup_reps(esw);
+ kfree(esw->vports);
+ kfree(esw);
+ return err;
+}
+
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
+{
+ if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
+ return;
+
+ esw_info(esw->dev, "cleanup\n");
+
+ esw->dev->priv.eswitch = NULL;
+ destroy_workqueue(esw->work_queue);
+ esw_offloads_cleanup_reps(esw);
+ kfree(esw->vports);
+ kfree(esw);
+}
+
+void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
+{
+ struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
+ u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
+ struct mlx5_vport *vport;
+
+ if (!esw) {
+ pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
+ vport_num);
+ return;
+ }
+
+ vport = &esw->vports[vport_num];
+ if (vport->enabled)
+ queue_work(esw->work_queue, &vport->vport_change_handler);
+}
+
+/* Vport Administration */
+#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
+
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+ int vport, u8 mac[ETH_ALEN])
+{
+ struct mlx5_vport *evport;
+ u64 node_guid;
+ int err = 0;
+
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+ if (evport->info.spoofchk && !is_valid_ether_addr(mac))
+ mlx5_core_warn(esw->dev,
+ "Set invalid MAC while spoofchk is on, vport(%d)\n",
+ vport);
+
+ err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
+ vport, err);
+ goto unlock;
+ }
+
+ node_guid_gen_from_mac(&node_guid, mac);
+ err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid);
+ if (err)
+ mlx5_core_warn(esw->dev,
+ "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
+ vport, err);
+
+ ether_addr_copy(evport->info.mac, mac);
+ evport->info.node_guid = node_guid;
+ if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ err = esw_vport_ingress_config(esw, evport);
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+ int vport, int link_state)
+{
+ struct mlx5_vport *evport;
+ int err = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+ err = mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport, link_state);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "Failed to set vport %d link state, err = %d",
+ vport, err);
+ goto unlock;
+ }
+
+ evport->info.link_state = link_state;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+ int vport, struct ifla_vf_info *ivi)
+{
+ struct mlx5_vport *evport;
+
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ evport = &esw->vports[vport];
+
+ memset(ivi, 0, sizeof(*ivi));
+ ivi->vf = vport - 1;
+
+ mutex_lock(&esw->state_lock);
+ ether_addr_copy(ivi->mac, evport->info.mac);
+ ivi->linkstate = evport->info.link_state;
+ ivi->vlan = evport->info.vlan;
+ ivi->qos = evport->info.qos;
+ ivi->spoofchk = evport->info.spoofchk;
+ ivi->trusted = evport->info.trusted;
+ ivi->min_tx_rate = evport->info.min_rate;
+ ivi->max_tx_rate = evport->info.max_rate;
+ mutex_unlock(&esw->state_lock);
+
+ return 0;
+}
+
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos, u8 set_flags)
+{
+ struct mlx5_vport *evport;
+ int err = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+ err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
+ if (err)
+ goto unlock;
+
+ evport->info.vlan = vlan;
+ evport->info.qos = qos;
+ if (evport->enabled && esw->mode == SRIOV_LEGACY) {
+ err = esw_vport_ingress_config(esw, evport);
+ if (err)
+ goto unlock;
+ err = esw_vport_egress_config(esw, evport);
+ }
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos)
+{
+ u8 set_flags = 0;
+
+ if (vlan || qos)
+ set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
+
+ return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+}
+
+int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
+ int vport, bool spoofchk)
+{
+ struct mlx5_vport *evport;
+ bool pschk;
+ int err = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+ pschk = evport->info.spoofchk;
+ evport->info.spoofchk = spoofchk;
+ if (pschk && !is_valid_ether_addr(evport->info.mac))
+ mlx5_core_warn(esw->dev,
+ "Spoofchk in set while MAC is invalid, vport(%d)\n",
+ evport->vport);
+ if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ err = esw_vport_ingress_config(esw, evport);
+ if (err)
+ evport->info.spoofchk = pschk;
+ mutex_unlock(&esw->state_lock);
+
+ return err;
+}
+
+int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+ int vport, bool setting)
+{
+ struct mlx5_vport *evport;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+ evport->info.trusted = setting;
+ if (evport->enabled)
+ esw_vport_change_handle_locked(evport);
+ mutex_unlock(&esw->state_lock);
+
+ return 0;
+}
+
+static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_vport *evport;
+ u32 max_guarantee = 0;
+ int i;
+
+ for (i = 0; i < esw->total_vports; i++) {
+ evport = &esw->vports[i];
+ if (!evport->enabled || evport->info.min_rate < max_guarantee)
+ continue;
+ max_guarantee = evport->info.min_rate;
+ }
+
+ if (max_guarantee)
+ return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+ return 0;
+}
+
+static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ u32 divider = calculate_vports_min_rate_divider(esw);
+ struct mlx5_vport *evport;
+ u32 vport_max_rate;
+ u32 vport_min_rate;
+ u32 bw_share;
+ int err;
+ int i;
+
+ for (i = 0; i < esw->total_vports; i++) {
+ evport = &esw->vports[i];
+ if (!evport->enabled)
+ continue;
+ vport_min_rate = evport->info.min_rate;
+ vport_max_rate = evport->info.max_rate;
+ bw_share = 0;
+
+ if (divider)
+ bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate,
+ divider,
+ fw_max_bw_share);
+
+ if (bw_share == evport->qos.bw_share)
+ continue;
+
+ err = esw_vport_qos_config(esw, i, vport_max_rate,
+ bw_share);
+ if (!err)
+ evport->qos.bw_share = bw_share;
+ else
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
+ u32 max_rate, u32 min_rate)
+{
+ struct mlx5_vport *evport;
+ u32 fw_max_bw_share;
+ u32 previous_min_rate;
+ bool min_rate_supported;
+ bool max_rate_supported;
+ int err = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
+ fw_max_bw_share >= MLX5_MIN_BW_SHARE;
+ max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
+
+ if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+ if (min_rate == evport->info.min_rate)
+ goto set_max_rate;
+
+ previous_min_rate = evport->info.min_rate;
+ evport->info.min_rate = min_rate;
+ err = normalize_vports_min_rate(esw);
+ if (err) {
+ evport->info.min_rate = previous_min_rate;
+ goto unlock;
+ }
+
+set_max_rate:
+ if (max_rate == evport->info.max_rate)
+ goto unlock;
+
+ err = esw_vport_qos_config(esw, vport, max_rate, evport->qos.bw_share);
+ if (!err)
+ evport->info.max_rate = max_rate;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
+ int vport_idx,
+ struct mlx5_vport_drop_stats *stats)
+{
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_vport *vport = &esw->vports[vport_idx];
+ u64 rx_discard_vport_down, tx_discard_vport_down;
+ u64 bytes = 0;
+ int err = 0;
+
+ if (!vport->enabled || esw->mode != SRIOV_LEGACY)
+ return 0;
+
+ if (vport->egress.drop_counter)
+ mlx5_fc_query(dev, vport->egress.drop_counter,
+ &stats->rx_dropped, &bytes);
+
+ if (vport->ingress.drop_counter)
+ mlx5_fc_query(dev, vport->ingress.drop_counter,
+ &stats->tx_dropped, &bytes);
+
+ if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
+ !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+ return 0;
+
+ err = mlx5_query_vport_down_stats(dev, vport_idx,
+ &rx_discard_vport_down,
+ &tx_discard_vport_down);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_GEN(dev, receive_discard_vport_down))
+ stats->rx_dropped += rx_discard_vport_down;
+ if (MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+ stats->tx_dropped += tx_discard_vport_down;
+
+ return 0;
+}
+
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+ int vport,
+ struct ifla_vf_stats *vf_stats)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
+ struct mlx5_vport_drop_stats stats = {0};
+ int err = 0;
+ u32 *out;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_vport_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ MLX5_SET(query_vport_counter_in, in, op_mod, 0);
+ MLX5_SET(query_vport_counter_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+
+ memset(out, 0, outlen);
+ err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
+ if (err)
+ goto free_out;
+
+ #define MLX5_GET_CTR(p, x) \
+ MLX5_GET64(query_vport_counter_out, p, x)
+
+ memset(vf_stats, 0, sizeof(*vf_stats));
+ vf_stats->rx_packets =
+ MLX5_GET_CTR(out, received_eth_unicast.packets) +
+ MLX5_GET_CTR(out, received_ib_unicast.packets) +
+ MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_ib_multicast.packets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+ vf_stats->rx_bytes =
+ MLX5_GET_CTR(out, received_eth_unicast.octets) +
+ MLX5_GET_CTR(out, received_ib_unicast.octets) +
+ MLX5_GET_CTR(out, received_eth_multicast.octets) +
+ MLX5_GET_CTR(out, received_ib_multicast.octets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.octets);
+
+ vf_stats->tx_packets =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_ib_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
+
+ vf_stats->tx_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_ib_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_multicast.octets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
+
+ vf_stats->multicast =
+ MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_ib_multicast.packets);
+
+ vf_stats->broadcast =
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+ err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats);
+ if (err)
+ goto free_out;
+ vf_stats->rx_dropped = stats.rx_dropped;
+ vf_stats->tx_dropped = stats.tx_dropped;
+
+free_out:
+ kvfree(out);
+ return err;
+}
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
new file mode 100644
index 000000000..c17bfcab5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_ESWITCH_H__
+#define __MLX5_ESWITCH_H__
+
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <net/devlink.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/fs.h>
+#include "lib/mpfs.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+#define MLX5_MAX_UC_PER_VPORT(dev) \
+ (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
+
+#define MLX5_MAX_MC_PER_VPORT(dev) \
+ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
+
+#define FDB_UPLINK_VPORT 0xffff
+
+#define MLX5_MIN_BW_SHARE 1
+
+#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
+ min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit)
+
+#define mlx5_esw_has_fwd_fdb(dev) \
+ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
+
+struct vport_ingress {
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *allow_untagged_spoofchk_grp;
+ struct mlx5_flow_group *allow_spoofchk_only_grp;
+ struct mlx5_flow_group *allow_untagged_only_grp;
+ struct mlx5_flow_group *drop_grp;
+ struct mlx5_flow_handle *allow_rule;
+ struct mlx5_flow_handle *drop_rule;
+ struct mlx5_fc *drop_counter;
+};
+
+struct vport_egress {
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *allowed_vlans_grp;
+ struct mlx5_flow_group *drop_grp;
+ struct mlx5_flow_handle *allowed_vlan;
+ struct mlx5_flow_handle *drop_rule;
+ struct mlx5_fc *drop_counter;
+};
+
+struct mlx5_vport_drop_stats {
+ u64 rx_dropped;
+ u64 tx_dropped;
+};
+
+struct mlx5_vport_info {
+ u8 mac[ETH_ALEN];
+ u16 vlan;
+ u8 qos;
+ u64 node_guid;
+ int link_state;
+ u32 min_rate;
+ u32 max_rate;
+ bool spoofchk;
+ bool trusted;
+};
+
+struct mlx5_vport {
+ struct mlx5_core_dev *dev;
+ int vport;
+ struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
+ struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE];
+ struct mlx5_flow_handle *promisc_rule;
+ struct mlx5_flow_handle *allmulti_rule;
+ struct work_struct vport_change_handler;
+
+ struct vport_ingress ingress;
+ struct vport_egress egress;
+
+ struct mlx5_vport_info info;
+
+ struct {
+ bool enabled;
+ u32 esw_tsar_ix;
+ u32 bw_share;
+ } qos;
+
+ bool enabled;
+ u16 enabled_events;
+};
+
+struct mlx5_eswitch_fdb {
+ union {
+ struct legacy_fdb {
+ struct mlx5_flow_table *fdb;
+ struct mlx5_flow_group *addr_grp;
+ struct mlx5_flow_group *allmulti_grp;
+ struct mlx5_flow_group *promisc_grp;
+ } legacy;
+
+ struct offloads_fdb {
+ struct mlx5_flow_table *fast_fdb;
+ struct mlx5_flow_table *fwd_fdb;
+ struct mlx5_flow_table *slow_fdb;
+ struct mlx5_flow_group *send_to_vport_grp;
+ struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle *miss_rule_uni;
+ struct mlx5_flow_handle *miss_rule_multi;
+ int vlan_push_pop_refcount;
+ } offloads;
+ };
+};
+
+struct mlx5_esw_offload {
+ struct mlx5_flow_table *ft_offloads;
+ struct mlx5_flow_group *vport_rx_group;
+ struct mlx5_eswitch_rep *vport_reps;
+ DECLARE_HASHTABLE(encap_tbl, 8);
+ DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+ u8 inline_mode;
+ u64 num_flows;
+ u8 encap;
+};
+
+/* E-Switch MC FDB table hash node */
+struct esw_mc_addr { /* SRIOV only */
+ struct l2addr_node node;
+ struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */
+ u32 refcnt;
+};
+
+struct mlx5_eswitch {
+ struct mlx5_core_dev *dev;
+ struct mlx5_eswitch_fdb fdb_table;
+ struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
+ struct workqueue_struct *work_queue;
+ struct mlx5_vport *vports;
+ int total_vports;
+ int enabled_vports;
+ /* Synchronize between vport change events
+ * and async SRIOV admin state changes
+ */
+ struct mutex state_lock;
+ struct esw_mc_addr mc_promisc;
+
+ struct {
+ bool enabled;
+ u32 root_tsar_id;
+ } qos;
+
+ struct mlx5_esw_offload offloads;
+ int mode;
+};
+
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
+int esw_offloads_init_reps(struct mlx5_eswitch *esw);
+
+/* E-Switch API */
+int mlx5_eswitch_init(struct mlx5_core_dev *dev);
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
+void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+ int vport, u8 mac[ETH_ALEN]);
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+ int vport, int link_state);
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos);
+int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
+ int vport, bool spoofchk);
+int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+ int vport_num, bool setting);
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
+ u32 max_rate, u32 min_rate);
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+ int vport, struct ifla_vf_info *ivi);
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+ int vport,
+ struct ifla_vf_stats *vf_stats);
+void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
+
+struct mlx5_flow_spec;
+struct mlx5_esw_flow_attr;
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
+
+enum {
+ SET_VLAN_STRIP = BIT(0),
+ SET_VLAN_INSERT = BIT(1)
+};
+
+enum mlx5_flow_match_level {
+ MLX5_MATCH_NONE = MLX5_INLINE_MODE_NONE,
+ MLX5_MATCH_L2 = MLX5_INLINE_MODE_L2,
+ MLX5_MATCH_L3 = MLX5_INLINE_MODE_IP,
+ MLX5_MATCH_L4 = MLX5_INLINE_MODE_TCP_UDP,
+};
+
+/* current maximum for flow based vport multicasting */
+#define MLX5_MAX_FLOW_FWD_VPORTS 2
+
+struct mlx5_esw_flow_attr {
+ struct mlx5_eswitch_rep *in_rep;
+ struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5_core_dev *out_mdev[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5_core_dev *in_mdev;
+
+ int mirror_count;
+ int out_count;
+
+ int action;
+ __be16 vlan_proto[MLX5_FS_VLAN_DEPTH];
+ u16 vlan_vid[MLX5_FS_VLAN_DEPTH];
+ u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
+ u8 total_vlan;
+ bool vlan_handled;
+ u32 encap_id;
+ u32 mod_hdr_id;
+ u8 match_level;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+};
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode);
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr);
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr);
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos, u8 set_flags);
+
+static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
+ u8 vlan_depth)
+{
+ bool ret = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan);
+
+ if (vlan_depth == 1)
+ return ret;
+
+ return ret && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan_2) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
+}
+
+#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
+
+#define esw_info(dev, format, ...) \
+ pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_warn(dev, format, ...) \
+ pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_debug(dev, format, ...) \
+ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+#else /* CONFIG_MLX5_ESWITCH */
+/* eswitch API stubs */
+static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
+static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
+static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
+static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
new file mode 100644
index 000000000..3028e8d90
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -0,0 +1,1368 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+enum {
+ FDB_FAST_PATH = 0,
+ FDB_SLOW_PATH
+};
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_table *ft = NULL;
+ struct mlx5_fc *counter = NULL;
+ struct mlx5_flow_handle *rule;
+ int j, i = 0;
+ void *misc;
+
+ if (esw->mode != SRIOV_OFFLOADS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (attr->mirror_count)
+ ft = esw->fdb_table.offloads.fwd_fdb;
+ else
+ ft = esw->fdb_table.offloads.fast_fdb;
+
+ flow_act.action = attr->action;
+ /* if per flow vlan pop/push is emulated, don't set that into the firmware */
+ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
+ flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+ else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+ flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]);
+ flow_act.vlan[0].vid = attr->vlan_vid[0];
+ flow_act.vlan[0].prio = attr->vlan_prio[0];
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+ flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]);
+ flow_act.vlan[1].vid = attr->vlan_vid[1];
+ flow_act.vlan[1].prio = attr->vlan_prio[1];
+ }
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ for (j = attr->mirror_count; j < attr->out_count; j++) {
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[i].vport.num = attr->out_rep[j]->vport;
+ dest[i].vport.vhca_id =
+ MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
+ dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ i++;
+ }
+ }
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ counter = mlx5_fc_create(esw->dev, true);
+ if (IS_ERR(counter)) {
+ rule = ERR_CAST(counter);
+ goto err_counter_alloc;
+ }
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[i].counter = counter;
+ i++;
+ }
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ if (attr->match_level == MLX5_MATCH_NONE)
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ else
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ flow_act.modify_id = attr->mod_hdr_id;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+ flow_act.encap_id = attr->encap_id;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i);
+ if (IS_ERR(rule))
+ goto err_add_rule;
+ else
+ esw->offloads.num_flows++;
+
+ return rule;
+
+err_add_rule:
+ mlx5_fc_destroy(esw->dev, counter);
+err_counter_alloc:
+ return rule;
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle *rule;
+ void *misc;
+ int i;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ for (i = 0; i < attr->mirror_count; i++) {
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[i].vport.num = attr->out_rep[i]->vport;
+ dest[i].vport.vhca_id =
+ MLX5_CAP_GEN(attr->out_mdev[i], vhca_id);
+ dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ }
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = esw->fdb_table.offloads.fwd_fdb,
+ i++;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ if (attr->match_level == MLX5_MATCH_NONE)
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ else
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS;
+
+ rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i);
+
+ if (!IS_ERR(rule))
+ esw->offloads.num_flows++;
+
+ return rule;
+}
+
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_fc *counter = NULL;
+
+ counter = mlx5_flow_rule_counter(rule);
+ mlx5_del_flow_rules(rule);
+ mlx5_fc_destroy(esw->dev, counter);
+ esw->offloads.num_flows--;
+}
+
+static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
+{
+ struct mlx5_eswitch_rep *rep;
+ int vf_vport, err = 0;
+
+ esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
+ for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
+ rep = &esw->offloads.vport_reps[vf_vport];
+ if (!rep->rep_if[REP_ETH].valid)
+ continue;
+
+ err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+static struct mlx5_eswitch_rep *
+esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
+{
+ struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
+
+ in_rep = attr->in_rep;
+ out_rep = attr->out_rep[0];
+
+ if (push)
+ vport = in_rep;
+ else if (pop)
+ vport = out_rep;
+ else
+ vport = in_rep;
+
+ return vport;
+}
+
+static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
+ bool push, bool pop, bool fwd)
+{
+ struct mlx5_eswitch_rep *in_rep, *out_rep;
+
+ if ((push || pop) && !fwd)
+ goto out_notsupp;
+
+ in_rep = attr->in_rep;
+ out_rep = attr->out_rep[0];
+
+ if (push && in_rep->vport == FDB_UPLINK_VPORT)
+ goto out_notsupp;
+
+ if (pop && out_rep->vport == FDB_UPLINK_VPORT)
+ goto out_notsupp;
+
+ /* vport has vlan push configured, can't offload VF --> wire rules w.o it */
+ if (!push && !pop && fwd)
+ if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT)
+ goto out_notsupp;
+
+ /* protects against (1) setting rules with different vlans to push and
+ * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
+ */
+ if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0]))
+ goto out_notsupp;
+
+ return 0;
+
+out_notsupp:
+ return -EOPNOTSUPP;
+}
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_eswitch_rep *vport = NULL;
+ bool push, pop, fwd;
+ int err = 0;
+
+ /* nop if we're on the vlan push/pop non emulation mode */
+ if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
+ return 0;
+
+ push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+ pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+ fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+
+ err = esw_add_vlan_action_check(attr, push, pop, fwd);
+ if (err)
+ return err;
+
+ attr->vlan_handled = false;
+
+ vport = esw_vlan_action_get_vport(attr, push, pop);
+
+ if (!push && !pop && fwd) {
+ /* tracks VF --> wire rules without vlan push action */
+ if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) {
+ vport->vlan_refcount++;
+ attr->vlan_handled = true;
+ }
+
+ return 0;
+ }
+
+ if (!push && !pop)
+ return 0;
+
+ if (!(offloads->vlan_push_pop_refcount)) {
+ /* it's the 1st vlan rule, apply global vlan pop policy */
+ err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP);
+ if (err)
+ goto out;
+ }
+ offloads->vlan_push_pop_refcount++;
+
+ if (push) {
+ if (vport->vlan_refcount)
+ goto skip_set_push;
+
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0,
+ SET_VLAN_INSERT | SET_VLAN_STRIP);
+ if (err)
+ goto out;
+ vport->vlan = attr->vlan_vid[0];
+skip_set_push:
+ vport->vlan_refcount++;
+ }
+out:
+ if (!err)
+ attr->vlan_handled = true;
+ return err;
+}
+
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_eswitch_rep *vport = NULL;
+ bool push, pop, fwd;
+ int err = 0;
+
+ /* nop if we're on the vlan push/pop non emulation mode */
+ if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
+ return 0;
+
+ if (!attr->vlan_handled)
+ return 0;
+
+ push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+ pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+ fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+
+ vport = esw_vlan_action_get_vport(attr, push, pop);
+
+ if (!push && !pop && fwd) {
+ /* tracks VF --> wire rules without vlan push action */
+ if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT)
+ vport->vlan_refcount--;
+
+ return 0;
+ }
+
+ if (push) {
+ vport->vlan_refcount--;
+ if (vport->vlan_refcount)
+ goto skip_unset_push;
+
+ vport->vlan = 0;
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport,
+ 0, 0, SET_VLAN_STRIP);
+ if (err)
+ goto out;
+ }
+
+skip_unset_push:
+ offloads->vlan_push_pop_refcount--;
+ if (offloads->vlan_push_pop_refcount)
+ return 0;
+
+ /* no more vlan rules, stop global vlan pop policy */
+ err = esw_set_global_vlan_pop(esw, 0);
+
+out:
+ return err;
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
+{
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ flow_rule = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
+ MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = vport;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule))
+ esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
+out:
+ kvfree(spec);
+ return flow_rule;
+}
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
+
+void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
+{
+ mlx5_del_flow_rules(rule);
+}
+
+static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_spec *spec;
+ void *headers_c;
+ void *headers_v;
+ int err = 0;
+ u8 *dmac_c;
+ u8 *dmac_v;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+ outer_headers.dmac_47_16);
+ dmac_c[0] = 0x01;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = 0;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err);
+ goto out;
+ }
+
+ esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+ outer_headers.dmac_47_16);
+ dmac_v[0] = 0x01;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ goto out;
+ }
+
+ esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
+out:
+ kvfree(spec);
+ return err;
+}
+
+#define ESW_OFFLOADS_NUM_GROUPS 4
+
+static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb = NULL;
+ int esw_size, err = 0;
+ u32 flags = 0;
+ u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ err = -EOPNOTSUPP;
+ goto out_namespace;
+ }
+
+ esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n",
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
+ max_flow_counter, ESW_OFFLOADS_NUM_GROUPS);
+
+ esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS,
+ 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+ if (mlx5_esw_has_fwd_fdb(dev))
+ esw_size >>= 1;
+
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
+
+ fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
+ esw_size,
+ ESW_OFFLOADS_NUM_GROUPS, 0,
+ flags);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
+ goto out_namespace;
+ }
+ esw->fdb_table.offloads.fast_fdb = fdb;
+
+ if (!mlx5_esw_has_fwd_fdb(dev))
+ goto out_namespace;
+
+ fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
+ esw_size,
+ ESW_OFFLOADS_NUM_GROUPS, 1,
+ flags);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create fwd table err %d\n", err);
+ goto out_ft;
+ }
+ esw->fdb_table.offloads.fwd_fdb = fdb;
+
+ return err;
+
+out_ft:
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
+out_namespace:
+ return err;
+}
+
+static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+{
+ if (mlx5_esw_has_fwd_fdb(esw->dev))
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb);
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
+}
+
+#define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
+
+static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb = NULL;
+ int table_size, ix, err = 0;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ u8 *dmac;
+
+ esw_debug(esw->dev, "Create offloads FDB Tables\n");
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ err = -EOPNOTSUPP;
+ goto ns_err;
+ }
+
+ err = esw_create_offloads_fast_fdb_table(esw);
+ if (err)
+ goto fast_fdb_err;
+
+ table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
+
+ ft_attr.max_fte = table_size;
+ ft_attr.prio = FDB_SLOW_PATH;
+
+ fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
+ goto slow_fdb_err;
+ }
+ esw->fdb_table.offloads.slow_fdb = fdb;
+
+ /* create send-to-vport group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+
+ ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+ goto send_vport_err;
+ }
+ esw->fdb_table.offloads.send_to_vport_grp = g;
+
+ /* create miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
+ goto miss_err;
+ }
+ esw->fdb_table.offloads.miss_grp = g;
+
+ err = esw_add_fdb_miss_rule(esw);
+ if (err)
+ goto miss_rule_err;
+
+ kvfree(flow_group_in);
+ return 0;
+
+miss_rule_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+miss_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+send_vport_err:
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
+slow_fdb_err:
+ esw_destroy_offloads_fast_fdb_table(esw);
+fast_fdb_err:
+ns_err:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
+{
+ if (!esw->fdb_table.offloads.fast_fdb)
+ return;
+
+ esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
+ esw_destroy_offloads_fast_fdb_table(esw);
+}
+
+static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_table *ft_offloads;
+ struct mlx5_flow_namespace *ns;
+ int err = 0;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+ if (!ns) {
+ esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.max_fte = dev->priv.sriov.num_vfs + 2;
+
+ ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft_offloads)) {
+ err = PTR_ERR(ft_offloads);
+ esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
+ return err;
+ }
+
+ esw->offloads.ft_offloads = ft_offloads;
+ return 0;
+}
+
+static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+
+ mlx5_destroy_flow_table(offloads->ft_offloads);
+}
+
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ struct mlx5_priv *priv = &esw->dev->priv;
+ u32 *flow_group_in;
+ void *match_criteria, *misc;
+ int err = 0;
+ int nvports = priv->sriov.num_vfs + 2;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ /* create vport rx group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+ misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
+
+ g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
+ goto out;
+ }
+
+ esw->offloads.vport_rx_group = g;
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
+{
+ mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
+{
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ flow_rule = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tirn;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
+ goto out;
+ }
+
+out:
+ kvfree(spec);
+ return flow_rule;
+}
+
+static int esw_offloads_start(struct mlx5_eswitch *esw)
+{
+ int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+ if (esw->mode != SRIOV_LEGACY) {
+ esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n");
+ return -EINVAL;
+ }
+
+ mlx5_eswitch_disable_sriov(esw);
+ err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ if (err) {
+ esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err);
+ err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ if (err1)
+ esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err1);
+ }
+ if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
+ if (mlx5_eswitch_inline_mode_get(esw,
+ num_vfs,
+ &esw->offloads.inline_mode)) {
+ esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
+ esw_warn(esw->dev, "Inline mode is different between vports\n");
+ }
+ }
+ return err;
+}
+
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+{
+ kfree(esw->offloads.vport_reps);
+}
+
+int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+{
+ int total_vfs = MLX5_TOTAL_VPORTS(esw->dev);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_esw_offload *offloads;
+ struct mlx5_eswitch_rep *rep;
+ u8 hw_id[ETH_ALEN];
+ int vport;
+
+ esw->offloads.vport_reps = kcalloc(total_vfs,
+ sizeof(struct mlx5_eswitch_rep),
+ GFP_KERNEL);
+ if (!esw->offloads.vport_reps)
+ return -ENOMEM;
+
+ offloads = &esw->offloads;
+ mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+
+ for (vport = 0; vport < total_vfs; vport++) {
+ rep = &offloads->vport_reps[vport];
+
+ rep->vport = vport;
+ ether_addr_copy(rep->hw_id, hw_id);
+ }
+
+ offloads->vport_reps[0].vport = FDB_UPLINK_VPORT;
+
+ return 0;
+}
+
+static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports,
+ u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ int vport;
+
+ for (vport = nvports - 1; vport >= 0; vport--) {
+ rep = &esw->offloads.vport_reps[vport];
+ if (!rep->rep_if[rep_type].valid)
+ continue;
+
+ rep->rep_if[rep_type].unload(rep);
+ }
+}
+
+static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
+{
+ u8 rep_type = NUM_REP_TYPES;
+
+ while (rep_type-- > 0)
+ esw_offloads_unload_reps_type(esw, nvports, rep_type);
+}
+
+static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
+ u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ int vport;
+ int err;
+
+ for (vport = 0; vport < nvports; vport++) {
+ rep = &esw->offloads.vport_reps[vport];
+ if (!rep->rep_if[rep_type].valid)
+ continue;
+
+ err = rep->rep_if[rep_type].load(esw->dev, rep);
+ if (err)
+ goto err_reps;
+ }
+
+ return 0;
+
+err_reps:
+ esw_offloads_unload_reps_type(esw, vport, rep_type);
+ return err;
+}
+
+static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
+{
+ u8 rep_type = 0;
+ int err;
+
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+ err = esw_offloads_load_reps_type(esw, nvports, rep_type);
+ if (err)
+ goto err_reps;
+ }
+
+ return err;
+
+err_reps:
+ while (rep_type-- > 0)
+ esw_offloads_unload_reps_type(esw, nvports, rep_type);
+ return err;
+}
+
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+{
+ int err;
+
+ err = esw_create_offloads_fdb_tables(esw, nvports);
+ if (err)
+ return err;
+
+ err = esw_create_offloads_table(esw);
+ if (err)
+ goto create_ft_err;
+
+ err = esw_create_vport_rx_group(esw);
+ if (err)
+ goto create_fg_err;
+
+ err = esw_offloads_load_reps(esw, nvports);
+ if (err)
+ goto err_reps;
+
+ return 0;
+
+err_reps:
+ esw_destroy_vport_rx_group(esw);
+
+create_fg_err:
+ esw_destroy_offloads_table(esw);
+
+create_ft_err:
+ esw_destroy_offloads_fdb_tables(esw);
+
+ return err;
+}
+
+static int esw_offloads_stop(struct mlx5_eswitch *esw)
+{
+ int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+ mlx5_eswitch_disable_sriov(esw);
+ err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ if (err) {
+ esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err);
+ err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ if (err1)
+ esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err);
+ }
+
+ /* enable back PF RoCE */
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
+ return err;
+}
+
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
+{
+ esw_offloads_unload_reps(esw, nvports);
+ esw_destroy_vport_rx_group(esw);
+ esw_destroy_offloads_table(esw);
+ esw_destroy_offloads_fdb_tables(esw);
+}
+
+static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
+{
+ switch (mode) {
+ case DEVLINK_ESWITCH_MODE_LEGACY:
+ *mlx5_mode = SRIOV_LEGACY;
+ break;
+ case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+ *mlx5_mode = SRIOV_OFFLOADS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
+{
+ switch (mlx5_mode) {
+ case SRIOV_LEGACY:
+ *mode = DEVLINK_ESWITCH_MODE_LEGACY;
+ break;
+ case SRIOV_OFFLOADS:
+ *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
+{
+ switch (mode) {
+ case DEVLINK_ESWITCH_INLINE_MODE_NONE:
+ *mlx5_mode = MLX5_INLINE_MODE_NONE;
+ break;
+ case DEVLINK_ESWITCH_INLINE_MODE_LINK:
+ *mlx5_mode = MLX5_INLINE_MODE_L2;
+ break;
+ case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
+ *mlx5_mode = MLX5_INLINE_MODE_IP;
+ break;
+ case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
+ *mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
+{
+ switch (mlx5_mode) {
+ case MLX5_INLINE_MODE_NONE:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
+ break;
+ case MLX5_INLINE_MODE_L2:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
+ break;
+ case MLX5_INLINE_MODE_IP:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
+ break;
+ case MLX5_INLINE_MODE_TCP_UDP:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_devlink_eswitch_check(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EOPNOTSUPP;
+
+ if(!MLX5_ESWITCH_MANAGER(dev))
+ return -EPERM;
+
+ if (dev->priv.eswitch->mode == SRIOV_NONE)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u16 cur_mlx5_mode, mlx5_mode = 0;
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ cur_mlx5_mode = dev->priv.eswitch->mode;
+
+ if (esw_mode_from_devlink(mode, &mlx5_mode))
+ return -EINVAL;
+
+ if (cur_mlx5_mode == mlx5_mode)
+ return 0;
+
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+ return esw_offloads_start(dev->priv.eswitch);
+ else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
+ return esw_offloads_stop(dev->priv.eswitch);
+ else
+ return -EINVAL;
+}
+
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
+}
+
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err, vport;
+ u8 mlx5_mode;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+ return 0;
+ /* fall through */
+ case MLX5_CAP_INLINE_MODE_L2:
+ esw_warn(dev, "Inline mode can't be set\n");
+ return -EOPNOTSUPP;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ break;
+ }
+
+ if (esw->offloads.num_flows > 0) {
+ esw_warn(dev, "Can't set inline mode when flows are configured\n");
+ return -EOPNOTSUPP;
+ }
+
+ err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
+ if (err)
+ goto out;
+
+ for (vport = 1; vport < esw->enabled_vports; vport++) {
+ err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
+ if (err) {
+ esw_warn(dev, "Failed to set min inline on vport %d\n",
+ vport);
+ goto revert_inline_mode;
+ }
+ }
+
+ esw->offloads.inline_mode = mlx5_mode;
+ return 0;
+
+revert_inline_mode:
+ while (--vport > 0)
+ mlx5_modify_nic_vport_min_inline(dev,
+ vport,
+ esw->offloads.inline_mode);
+out:
+ return err;
+}
+
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+}
+
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+{
+ u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
+ struct mlx5_core_dev *dev = esw->dev;
+ int vport;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager))
+ return -EOPNOTSUPP;
+
+ if (esw->mode == SRIOV_NONE)
+ return -EOPNOTSUPP;
+
+ switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ mlx5_mode = MLX5_INLINE_MODE_NONE;
+ goto out;
+ case MLX5_CAP_INLINE_MODE_L2:
+ mlx5_mode = MLX5_INLINE_MODE_L2;
+ goto out;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ goto query_vports;
+ }
+
+query_vports:
+ for (vport = 1; vport <= nvfs; vport++) {
+ mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
+ if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+ return -EINVAL;
+ prev_mlx5_mode = mlx5_mode;
+ }
+
+out:
+ *mode = mlx5_mode;
+ return 0;
+}
+
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
+ (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
+ return -EOPNOTSUPP;
+
+ if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
+ return -EOPNOTSUPP;
+
+ if (esw->mode == SRIOV_LEGACY) {
+ esw->offloads.encap = encap;
+ return 0;
+ }
+
+ if (esw->offloads.encap == encap)
+ return 0;
+
+ if (esw->offloads.num_flows > 0) {
+ esw_warn(dev, "Can't set encapsulation when flows are configured\n");
+ return -EOPNOTSUPP;
+ }
+
+ esw_destroy_offloads_fast_fdb_table(esw);
+
+ esw->offloads.encap = encap;
+ err = esw_create_offloads_fast_fdb_table(esw);
+ if (err) {
+ esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err);
+ esw->offloads.encap = !encap;
+ (void)esw_create_offloads_fast_fdb_table(esw);
+ }
+ return err;
+}
+
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ *encap = esw->offloads.encap;
+ return 0;
+}
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index,
+ struct mlx5_eswitch_rep_if *__rep_if,
+ u8 rep_type)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep_if *rep_if;
+
+ rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type];
+
+ rep_if->load = __rep_if->load;
+ rep_if->unload = __rep_if->unload;
+ rep_if->get_proto_dev = __rep_if->get_proto_dev;
+ rep_if->priv = __rep_if->priv;
+
+ rep_if->valid = true;
+}
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
+
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index, u8 rep_type)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep *rep;
+
+ rep = &offloads->vport_reps[vport_index];
+
+ if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled)
+ rep->rep_if[rep_type].unload(rep);
+
+ rep->rep_if[rep_type].valid = false;
+}
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
+
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
+{
+#define UPLINK_REP_INDEX 0
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep *rep;
+
+ rep = &offloads->vport_reps[UPLINK_REP_INDEX];
+ return rep->rep_if[rep_type].priv;
+}
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+ int vport,
+ u8 rep_type)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep *rep;
+
+ if (vport == FDB_UPLINK_VPORT)
+ vport = UPLINK_REP_INDEX;
+
+ rep = &offloads->vport_reps[vport];
+
+ if (rep->rep_if[rep_type].valid &&
+ rep->rep_if[rep_type].get_proto_dev)
+ return rep->rep_if[rep_type].get_proto_dev(rep);
+ return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+ int vport)
+{
+ return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
new file mode 100644
index 000000000..c0fd2212e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+
+#include "mlx5_core.h"
+#include "fpga/cmd.h"
+
+#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \
+ MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+ void *buf, bool write)
+{
+ u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0};
+ u32 out[MLX5_FPGA_ACCESS_REG_SZ];
+ int err;
+
+ if (size & 3)
+ return -EINVAL;
+ if (addr & 3)
+ return -EINVAL;
+ if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+ return -EINVAL;
+
+ MLX5_SET(fpga_access_reg, in, size, size);
+ MLX5_SET64(fpga_access_reg, in, address, addr);
+ if (write)
+ memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_ACCESS_REG, 0, write);
+ if (err)
+ return err;
+
+ if (!write)
+ memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size);
+
+ return 0;
+}
+
+int mlx5_fpga_caps(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0};
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga,
+ MLX5_ST_SZ_BYTES(fpga_cap),
+ MLX5_REG_FPGA_CAP, 0, 0);
+}
+
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+
+ MLX5_SET(fpga_ctrl, in, operation, op);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_CTRL, 0, true);
+}
+
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size)
+{
+ unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len);
+ u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr);
+ unsigned int read;
+ int ret = 0;
+
+ if (cap_size > size) {
+ mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u",
+ size, cap_size);
+ return -EINVAL;
+ }
+
+ while (cap_size > 0) {
+ read = min_t(unsigned int, cap_size,
+ MLX5_FPGA_ACCESS_REG_SIZE_MAX);
+
+ ret = mlx5_fpga_access_reg(dev, read, addr, caps, false);
+ if (ret) {
+ mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d",
+ read, addr, ret);
+ return ret;
+ }
+
+ cap_size -= read;
+ addr += read;
+ caps += read;
+ }
+
+ return ret;
+}
+
+int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+ int err;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_CTRL, 0, false);
+ if (err)
+ return err;
+
+ query->status = MLX5_GET(fpga_ctrl, out, status);
+ query->admin_image = MLX5_GET(fpga_ctrl, out, flash_select_admin);
+ query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper);
+ return 0;
+}
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+ u32 *fpga_qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)];
+ int ret;
+
+ MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
+ memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
+ MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc),
+ MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc));
+ *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn);
+ return ret;
+}
+
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ enum mlx5_fpga_qpc_field_select fields,
+ void *fpga_qpc)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)];
+
+ MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
+ MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
+ MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn);
+ memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
+ MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
+ u32 fpga_qpn, void *fpga_qpc)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)];
+ int ret;
+
+ MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
+ MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc),
+ MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc));
+ return ret;
+}
+
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)];
+
+ MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP);
+ MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ bool clear, struct mlx5_fpga_qp_counters *data)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)];
+ int ret;
+
+ MLX5_SET(fpga_query_qp_counters_in, in, opcode,
+ MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS);
+ MLX5_SET(fpga_query_qp_counters_in, in, clear, clear);
+ MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_ack_packets);
+ data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_send_packets);
+ data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ tx_ack_packets);
+ data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ tx_send_packets);
+ data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_total_drop);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
new file mode 100644
index 000000000..eb8b0fe0b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_FPGA_H__
+#define __MLX5_FPGA_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_fpga_device_id {
+ MLX5_FPGA_DEVICE_UNKNOWN = 0,
+ MLX5_FPGA_DEVICE_KU040 = 1,
+ MLX5_FPGA_DEVICE_KU060 = 2,
+ MLX5_FPGA_DEVICE_KU060_2 = 3,
+};
+
+enum mlx5_fpga_image {
+ MLX5_FPGA_IMAGE_USER = 0,
+ MLX5_FPGA_IMAGE_FACTORY,
+};
+
+enum mlx5_fpga_status {
+ MLX5_FPGA_STATUS_SUCCESS = 0,
+ MLX5_FPGA_STATUS_FAILURE = 1,
+ MLX5_FPGA_STATUS_IN_PROGRESS = 2,
+ MLX5_FPGA_STATUS_NONE = 0xFFFF,
+};
+
+struct mlx5_fpga_query {
+ enum mlx5_fpga_image admin_image;
+ enum mlx5_fpga_image oper_image;
+ enum mlx5_fpga_status status;
+};
+
+enum mlx5_fpga_qpc_field_select {
+ MLX5_FPGA_QPC_STATE = BIT(0),
+};
+
+struct mlx5_fpga_qp_counters {
+ u64 rx_ack_packets;
+ u64 rx_send_packets;
+ u64 tx_ack_packets;
+ u64 tx_send_packets;
+ u64 rx_total_drop;
+};
+
+int mlx5_fpga_caps(struct mlx5_core_dev *dev);
+int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op);
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+ void *buf, bool write);
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size);
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+ u32 *fpga_qpn);
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc);
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc);
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ bool clear, struct mlx5_fpga_qp_counters *data);
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn);
+
+#endif /* __MLX5_FPGA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
new file mode 100644
index 000000000..d8d0b6bd5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -0,0 +1,1047 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <net/addrconf.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/vport.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fpga/conn.h"
+
+#define MLX5_FPGA_PKEY 0xFFFF
+#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */
+#define MLX5_FPGA_RECV_SIZE 2048
+#define MLX5_FPGA_PORT_NUM 1
+#define MLX5_FPGA_CQ_BUDGET 64
+
+static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct device *dma_device;
+ int err = 0;
+
+ if (unlikely(!buf->sg[0].data))
+ goto out;
+
+ dma_device = &conn->fdev->mdev->pdev->dev;
+ buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
+ buf->sg[0].size, buf->dma_dir);
+ err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (!buf->sg[1].data)
+ goto out;
+
+ buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data,
+ buf->sg[1].size, buf->dma_dir);
+ err = dma_mapping_error(dma_device, buf->sg[1].dma_addr);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err);
+ dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+ buf->sg[0].size, buf->dma_dir);
+ err = -ENOMEM;
+ }
+
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct device *dma_device;
+
+ dma_device = &conn->fdev->mdev->pdev->dev;
+ if (buf->sg[1].data)
+ dma_unmap_single(dma_device, buf->sg[1].dma_addr,
+ buf->sg[1].size, buf->dma_dir);
+
+ if (likely(buf->sg[0].data))
+ dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+ buf->sg[0].size, buf->dma_dir);
+}
+
+static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_wqe_data_seg *data;
+ unsigned int ix;
+ int err = 0;
+
+ err = mlx5_fpga_conn_map_buf(conn, buf);
+ if (unlikely(err))
+ goto out;
+
+ if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) {
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ return -EBUSY;
+ }
+
+ ix = conn->qp.rq.pc & (conn->qp.rq.size - 1);
+ data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix);
+ data->byte_count = cpu_to_be32(buf->sg[0].size);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+ data->addr = cpu_to_be64(buf->sg[0].dma_addr);
+
+ conn->qp.rq.pc++;
+ conn->qp.rq.bufs[ix] = buf;
+
+ /* Make sure that descriptors are written before doorbell record. */
+ dma_wmb();
+ *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff);
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe)
+{
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+ *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc);
+ /* Make sure that doorbell record is visible before ringing */
+ wmb();
+ mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL);
+}
+
+static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ struct mlx5_wqe_data_seg *data;
+ unsigned int ix, sgi;
+ int size = 1;
+
+ ix = conn->qp.sq.pc & (conn->qp.sq.size - 1);
+
+ ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix);
+ data = (void *)(ctrl + 1);
+
+ for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) {
+ if (!buf->sg[sgi].data)
+ break;
+ data->byte_count = cpu_to_be32(buf->sg[sgi].size);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+ data->addr = cpu_to_be64(buf->sg[sgi].dma_addr);
+ data++;
+ size++;
+ }
+
+ ctrl->imm = 0;
+ ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) |
+ MLX5_OPCODE_SEND);
+ ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8));
+
+ conn->qp.sq.pc++;
+ conn->qp.sq.bufs[ix] = buf;
+ mlx5_fpga_conn_notify_hw(conn, ctrl);
+}
+
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ unsigned long flags;
+ int err;
+
+ if (!conn->qp.active)
+ return -ENOTCONN;
+
+ buf->dma_dir = DMA_TO_DEVICE;
+ err = mlx5_fpga_conn_map_buf(conn, buf);
+ if (err)
+ return err;
+
+ spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+ if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) {
+ list_add_tail(&buf->list, &conn->qp.sq.backlog);
+ goto out_unlock;
+ }
+
+ mlx5_fpga_conn_post_send(conn, buf);
+
+out_unlock:
+ spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+ return err;
+}
+
+static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int err;
+
+ buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->sg[0].data = (void *)(buf + 1);
+ buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+ buf->dma_dir = DMA_FROM_DEVICE;
+
+ err = mlx5_fpga_conn_post_recv(conn, buf);
+ if (err)
+ kfree(buf);
+
+ return err;
+}
+
+static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ struct mlx5_core_mkey *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe, u8 status)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int ix, err;
+
+ ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1);
+ buf = conn->qp.rq.bufs[ix];
+ conn->qp.rq.bufs[ix] = NULL;
+ conn->qp.rq.cc++;
+
+ if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+ mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+ else
+ mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+
+ if (unlikely(status || !conn->qp.active)) {
+ conn->qp.active = false;
+ kfree(buf);
+ return;
+ }
+
+ buf->sg[0].size = be32_to_cpu(cqe->byte_cnt);
+ mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n",
+ buf->sg[0].size);
+ conn->recv_cb(conn->cb_arg, buf);
+
+ buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+ err = mlx5_fpga_conn_post_recv(conn, buf);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev,
+ "Failed to re-post recv buf: %d\n", err);
+ kfree(buf);
+ }
+}
+
+static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe, u8 status)
+{
+ struct mlx5_fpga_dma_buf *buf, *nextbuf;
+ unsigned long flags;
+ int ix;
+
+ spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+ ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1);
+ buf = conn->qp.sq.bufs[ix];
+ conn->qp.sq.bufs[ix] = NULL;
+ conn->qp.sq.cc++;
+
+ /* Handle backlog still under the spinlock to ensure message post order */
+ if (unlikely(!list_empty(&conn->qp.sq.backlog))) {
+ if (likely(conn->qp.active)) {
+ nextbuf = list_first_entry(&conn->qp.sq.backlog,
+ struct mlx5_fpga_dma_buf, list);
+ list_del(&nextbuf->list);
+ mlx5_fpga_conn_post_send(conn, nextbuf);
+ }
+ }
+
+ spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+
+ if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+ mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+ else
+ mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+
+ if (likely(buf->complete))
+ buf->complete(conn, conn->fdev, buf, status);
+
+ if (unlikely(status))
+ conn->qp.active = false;
+}
+
+static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe)
+{
+ u8 opcode, status = 0;
+
+ opcode = cqe->op_own >> 4;
+
+ switch (opcode) {
+ case MLX5_CQE_REQ_ERR:
+ status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+ /* Fall through */
+ case MLX5_CQE_REQ:
+ mlx5_fpga_conn_sq_cqe(conn, cqe, status);
+ break;
+
+ case MLX5_CQE_RESP_ERR:
+ status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+ /* Fall through */
+ case MLX5_CQE_RESP_SEND:
+ mlx5_fpga_conn_rq_cqe(conn, cqe, status);
+ break;
+ default:
+ mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n",
+ opcode);
+ }
+}
+
+static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn)
+{
+ mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT,
+ conn->fdev->conn_res.uar->map, conn->cq.wq.cc);
+}
+
+static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq,
+ enum mlx5_event event)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+ mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn);
+}
+
+static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp);
+ mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn);
+}
+
+static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
+ unsigned int budget)
+{
+ struct mlx5_cqe64 *cqe;
+
+ while (budget) {
+ cqe = mlx5_cqwq_get_cqe(&conn->cq.wq);
+ if (!cqe)
+ break;
+
+ budget--;
+ mlx5_cqwq_pop(&conn->cq.wq);
+ mlx5_fpga_conn_handle_cqe(conn, cqe);
+ mlx5_cqwq_update_db_record(&conn->cq.wq);
+ }
+ if (!budget) {
+ tasklet_schedule(&conn->cq.tasklet);
+ return;
+ }
+
+ mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc);
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+ mlx5_fpga_conn_arm_cq(conn);
+}
+
+static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
+{
+ struct mlx5_fpga_conn *conn = (void *)data;
+
+ if (unlikely(!conn->qp.active))
+ return;
+ mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+ if (unlikely(!conn->qp.active))
+ return;
+ mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+ struct mlx5_wq_param wqp;
+ struct mlx5_cqe64 *cqe;
+ int inlen, err, eqn;
+ unsigned int irqn;
+ void *cqc, *in;
+ __be64 *pas;
+ u32 i;
+
+ cq_size = roundup_pow_of_two(cq_size);
+ MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size));
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq,
+ &conn->cq.wq_ctrl);
+ if (err)
+ return err;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) {
+ cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i);
+ cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * conn->cq.wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_cqwq;
+ }
+
+ err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
+ if (err) {
+ kvfree(in);
+ goto err_cqwq;
+ }
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
+ MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma);
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+ mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);
+
+ err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
+ kvfree(in);
+
+ if (err)
+ goto err_cqwq;
+
+ conn->cq.mcq.cqe_sz = 64;
+ conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db;
+ conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1;
+ *conn->cq.mcq.set_ci_db = 0;
+ *conn->cq.mcq.arm_db = 0;
+ conn->cq.mcq.vector = 0;
+ conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
+ conn->cq.mcq.event = mlx5_fpga_conn_cq_event;
+ conn->cq.mcq.irqn = irqn;
+ conn->cq.mcq.uar = fdev->conn_res.uar;
+ tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
+ (unsigned long)conn);
+
+ mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
+
+ goto out;
+
+err_cqwq:
+ mlx5_wq_destroy(&conn->cq.wq_ctrl);
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn)
+{
+ tasklet_disable(&conn->cq.tasklet);
+ tasklet_kill(&conn->cq.tasklet);
+ mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq);
+ mlx5_wq_destroy(&conn->cq.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ struct mlx5_wq_param wqp;
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq,
+ &conn->qp.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
+ unsigned int tx_size, unsigned int rx_size)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0};
+ void *in = NULL, *qpc;
+ int err, inlen;
+
+ conn->qp.rq.pc = 0;
+ conn->qp.rq.cc = 0;
+ conn->qp.rq.size = roundup_pow_of_two(rx_size);
+ conn->qp.sq.pc = 0;
+ conn->qp.sq.cc = 0;
+ conn->qp.sq.size = roundup_pow_of_two(tx_size);
+
+ MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size));
+ MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size));
+ err = mlx5_fpga_conn_create_wq(conn, temp_qpc);
+ if (err)
+ goto out;
+
+ conn->qp.rq.bufs = kvcalloc(conn->qp.rq.size,
+ sizeof(conn->qp.rq.bufs[0]),
+ GFP_KERNEL);
+ if (!conn->qp.rq.bufs) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ conn->qp.sq.bufs = kvcalloc(conn->qp.sq.size,
+ sizeof(conn->qp.sq.bufs[0]),
+ GFP_KERNEL);
+ if (!conn->qp.sq.bufs) {
+ err = -ENOMEM;
+ goto err_rq_bufs;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+ MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+ conn->qp.wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_sq_bufs;
+ }
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index);
+ MLX5_SET(qpc, qpc, log_page_size,
+ conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, fre, 1);
+ MLX5_SET(qpc, qpc, rlky, 1);
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn);
+ MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size));
+ MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size));
+ MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+ MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+ MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+
+ mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
+
+ err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
+ if (err)
+ goto err_sq_bufs;
+
+ conn->qp.mqp.event = mlx5_fpga_conn_event;
+ mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn);
+
+ goto out;
+
+err_sq_bufs:
+ kvfree(conn->qp.sq.bufs);
+err_rq_bufs:
+ kvfree(conn->qp.rq.bufs);
+err_wq:
+ mlx5_wq_destroy(&conn->qp.wq_ctrl);
+out:
+ kvfree(in);
+ return err;
+}
+
+static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn)
+{
+ int ix;
+
+ for (ix = 0; ix < conn->qp.rq.size; ix++) {
+ if (!conn->qp.rq.bufs[ix])
+ continue;
+ mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]);
+ kfree(conn->qp.rq.bufs[ix]);
+ conn->qp.rq.bufs[ix] = NULL;
+ }
+}
+
+static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_dma_buf *buf, *temp;
+ int ix;
+
+ for (ix = 0; ix < conn->qp.sq.size; ix++) {
+ buf = conn->qp.sq.bufs[ix];
+ if (!buf)
+ continue;
+ conn->qp.sq.bufs[ix] = NULL;
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ if (!buf->complete)
+ continue;
+ buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+ }
+ list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) {
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ if (!buf->complete)
+ continue;
+ buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+ }
+}
+
+static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
+{
+ mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp);
+ mlx5_fpga_conn_free_recv_bufs(conn);
+ mlx5_fpga_conn_flush_send_bufs(conn);
+ kvfree(conn->qp.sq.bufs);
+ kvfree(conn->qp.rq.bufs);
+ mlx5_wq_destroy(&conn->qp.wq_ctrl);
+}
+
+static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_core_dev *mdev = conn->fdev->mdev;
+
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn);
+
+ return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL,
+ &conn->qp.mqp);
+}
+
+static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc = NULL;
+ int err;
+
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn);
+
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
+ MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn);
+ MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+ MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
+ &conn->qp.mqp);
+ if (err) {
+ mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+ goto out;
+ }
+
+out:
+ kfree(qpc);
+ return err;
+}
+
+static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc = NULL;
+ int err;
+
+ mlx5_fpga_dbg(conn->fdev, "QP RTR\n");
+
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES);
+ MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg));
+ MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn);
+ MLX5_SET(qpc, qpc, next_rcv_psn,
+ MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn));
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
+ ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
+ MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32));
+ MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
+ MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port));
+ MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
+ conn->qp.sgid_index);
+ MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0);
+ memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
+ MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip),
+ MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip));
+
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
+ &conn->qp.mqp);
+ if (err) {
+ mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+ goto out;
+ }
+
+out:
+ kfree(qpc);
+ return err;
+}
+
+static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc = NULL;
+ u32 opt_mask;
+ int err;
+
+ mlx5_fpga_dbg(conn->fdev, "QP RTS\n");
+
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpc, qpc, log_ack_req_freq, 8);
+ MLX5_SET(qpc, qpc, min_rnr_nak, 0x12);
+ MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */
+ MLX5_SET(qpc, qpc, next_send_psn,
+ MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn));
+ MLX5_SET(qpc, qpc, retry_count, 7);
+ MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
+
+ opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT;
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc,
+ &conn->qp.mqp);
+ if (err) {
+ mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+ goto out;
+ }
+
+out:
+ kfree(qpc);
+ return err;
+}
+
+static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ int err;
+
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE);
+ err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+ MLX5_FPGA_QPC_STATE, &conn->fpga_qpc);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err);
+ goto out;
+ }
+
+ err = mlx5_fpga_conn_reset_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state to reset\n");
+ goto err_fpga_qp;
+ }
+
+ err = mlx5_fpga_conn_init_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n");
+ goto err_fpga_qp;
+ }
+ conn->qp.active = true;
+
+ while (!mlx5_fpga_conn_post_recv_buf(conn))
+ ;
+
+ err = mlx5_fpga_conn_rtr_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n");
+ goto err_recv_bufs;
+ }
+
+ err = mlx5_fpga_conn_rts_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n");
+ goto err_recv_bufs;
+ }
+ goto out;
+
+err_recv_bufs:
+ mlx5_fpga_conn_free_recv_bufs(conn);
+err_fpga_qp:
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+ if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+ MLX5_FPGA_QPC_STATE, &conn->fpga_qpc))
+ mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n");
+out:
+ return err;
+}
+
+struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr,
+ enum mlx5_ifc_fpga_qp_type qp_type)
+{
+ struct mlx5_fpga_conn *ret, *conn;
+ u8 *remote_mac, *remote_ip;
+ int err;
+
+ if (!attr->recv_cb)
+ return ERR_PTR(-EINVAL);
+
+ conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+ if (!conn)
+ return ERR_PTR(-ENOMEM);
+
+ conn->fdev = fdev;
+ INIT_LIST_HEAD(&conn->qp.sq.backlog);
+
+ spin_lock_init(&conn->qp.sq.lock);
+
+ conn->recv_cb = attr->recv_cb;
+ conn->cb_arg = attr->cb_arg;
+
+ remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
+ err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err;
+ }
+
+ /* Build Modified EUI-64 IPv6 address from the MAC address */
+ remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip);
+ remote_ip[0] = 0xfe;
+ remote_ip[1] = 0x80;
+ addrconf_addr_eui48(&remote_ip[8], remote_mac);
+
+ err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err;
+ }
+
+ err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index,
+ MLX5_ROCE_VERSION_2,
+ MLX5_ROCE_L3_TYPE_IPV6,
+ remote_ip, remote_mac, true, 0,
+ MLX5_FPGA_PORT_NUM);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_rsvd_gid;
+ }
+ mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index);
+
+ /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe,
+ * created during processing of the cqe
+ */
+ err = mlx5_fpga_conn_create_cq(conn,
+ (attr->tx_size + attr->rx_size) * 2);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_gid;
+ }
+
+ mlx5_fpga_conn_arm_cq(conn);
+
+ err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_cq;
+ }
+
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7);
+
+ err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc,
+ &conn->fpga_qpn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_qp;
+ }
+
+ err = mlx5_fpga_conn_connect(conn);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err_conn;
+ }
+
+ mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn);
+ ret = conn;
+ goto out;
+
+err_conn:
+ mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+err_qp:
+ mlx5_fpga_conn_destroy_qp(conn);
+err_cq:
+ mlx5_fpga_conn_destroy_cq(conn);
+err_gid:
+ mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL,
+ NULL, false, 0, MLX5_FPGA_PORT_NUM);
+err_rsvd_gid:
+ mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index);
+err:
+ kfree(conn);
+out:
+ return ret;
+}
+
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ int err = 0;
+
+ conn->qp.active = false;
+ tasklet_disable(&conn->cq.tasklet);
+ synchronize_irq(conn->cq.mcq.irqn);
+
+ mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL,
+ &conn->qp.mqp);
+ if (err)
+ mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err);
+ mlx5_fpga_conn_destroy_qp(conn);
+ mlx5_fpga_conn_destroy_cq(conn);
+
+ mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0,
+ NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM);
+ mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index);
+ kfree(conn);
+}
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev)
+{
+ int err;
+
+ err = mlx5_nic_vport_enable_roce(fdev->mdev);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err);
+ goto out;
+ }
+
+ fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev);
+ if (IS_ERR(fdev->conn_res.uar)) {
+ err = PTR_ERR(fdev->conn_res.uar);
+ mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err);
+ goto err_roce;
+ }
+ mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n",
+ fdev->conn_res.uar->index);
+
+ err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn);
+ if (err) {
+ mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err);
+ goto err_uar;
+ }
+ mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn);
+
+ err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn,
+ &fdev->conn_res.mkey);
+ if (err) {
+ mlx5_fpga_err(fdev, "create mkey failed, %d\n", err);
+ goto err_dealloc_pd;
+ }
+ mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key);
+
+ return 0;
+
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+err_uar:
+ mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+err_roce:
+ mlx5_nic_vport_disable_roce(fdev->mdev);
+out:
+ return err;
+}
+
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev)
+{
+ mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey);
+ mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+ mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+ mlx5_nic_vport_disable_roce(fdev->mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
new file mode 100644
index 000000000..634ae10e2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_CONN_H__
+#define __MLX5_FPGA_CONN_H__
+
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+
+#include "fpga/core.h"
+#include "fpga/sdk.h"
+#include "wq.h"
+
+struct mlx5_fpga_conn {
+ struct mlx5_fpga_device *fdev;
+
+ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+ void *cb_arg;
+
+ /* FPGA QP */
+ u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)];
+ u32 fpga_qpn;
+
+ /* CQ */
+ struct {
+ struct mlx5_cqwq wq;
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_core_cq mcq;
+ struct tasklet_struct tasklet;
+ } cq;
+
+ /* QP */
+ struct {
+ bool active;
+ int sgid_index;
+ struct mlx5_wq_qp wq;
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_core_qp mqp;
+ struct {
+ spinlock_t lock; /* Protects all SQ state */
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ struct mlx5_fpga_dma_buf **bufs;
+ struct list_head backlog;
+ } sq;
+ struct {
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ struct mlx5_fpga_dma_buf **bufs;
+ } rq;
+ } qp;
+};
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev);
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev);
+struct mlx5_fpga_conn *
+mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr,
+ enum mlx5_ifc_fpga_qp_type qp_type);
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn);
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf);
+
+#endif /* __MLX5_FPGA_CONN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
new file mode 100644
index 000000000..310f9e7d8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fpga/core.h"
+#include "fpga/conn.h"
+
+static const char *const mlx5_fpga_error_strings[] = {
+ "Null Syndrome",
+ "Corrupted DDR",
+ "Flash Timeout",
+ "Internal Link Error",
+ "Watchdog HW Failure",
+ "I2C Failure",
+ "Image Changed",
+ "Temperature Critical",
+};
+
+static const char * const mlx5_fpga_qp_error_strings[] = {
+ "Null Syndrome",
+ "Retry Counter Expired",
+ "RNR Expired",
+};
+static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
+{
+ struct mlx5_fpga_device *fdev = NULL;
+
+ fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
+ if (!fdev)
+ return NULL;
+
+ spin_lock_init(&fdev->state_lock);
+ fdev->state = MLX5_FPGA_STATUS_NONE;
+ return fdev;
+}
+
+static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
+{
+ switch (image) {
+ case MLX5_FPGA_IMAGE_USER:
+ return "user";
+ case MLX5_FPGA_IMAGE_FACTORY:
+ return "factory";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *mlx5_fpga_device_name(u32 device)
+{
+ switch (device) {
+ case MLX5_FPGA_DEVICE_KU040:
+ return "ku040";
+ case MLX5_FPGA_DEVICE_KU060:
+ return "ku060";
+ case MLX5_FPGA_DEVICE_KU060_2:
+ return "ku060_2";
+ case MLX5_FPGA_DEVICE_UNKNOWN:
+ default:
+ return "unknown";
+ }
+}
+
+static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
+{
+ struct mlx5_fpga_query query;
+ int err;
+
+ err = mlx5_fpga_query(fdev->mdev, &query);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
+ return err;
+ }
+
+ fdev->last_admin_image = query.admin_image;
+ fdev->last_oper_image = query.oper_image;
+
+ mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n",
+ query.status, query.admin_image, query.oper_image);
+
+ if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
+ mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
+ mlx5_fpga_image_name(fdev->last_oper_image),
+ query.status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+{
+ int err;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
+ return err;
+ }
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
+ return err;
+ }
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
+ return err;
+ }
+ return 0;
+}
+
+int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int max_num_qps;
+ unsigned long flags;
+ u32 fpga_device_id;
+ int err;
+
+ if (!fdev)
+ return 0;
+
+ err = mlx5_fpga_device_load_check(fdev);
+ if (err)
+ goto out;
+
+ err = mlx5_fpga_caps(fdev->mdev);
+ if (err)
+ goto out;
+
+ fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device);
+ mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n",
+ mlx5_fpga_device_name(fpga_device_id),
+ fpga_device_id,
+ mlx5_fpga_image_name(fdev->last_oper_image),
+ MLX5_CAP_FPGA(fdev->mdev, image_version),
+ MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
+ MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
+ MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
+
+ max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+ if (!max_num_qps) {
+ mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
+ err = -ENOTSUPP;
+ goto out;
+ }
+
+ err = mlx5_core_reserve_gids(mdev, max_num_qps);
+ if (err)
+ goto out;
+
+ err = mlx5_fpga_conn_device_init(fdev);
+ if (err)
+ goto err_rsvd_gid;
+
+ if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+ err = mlx5_fpga_device_brb(fdev);
+ if (err)
+ goto err_conn_init;
+ }
+
+ goto out;
+
+err_conn_init:
+ mlx5_fpga_conn_device_cleanup(fdev);
+
+err_rsvd_gid:
+ mlx5_core_unreserve_gids(mdev, max_num_qps);
+out:
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ return err;
+}
+
+int mlx5_fpga_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = NULL;
+
+ if (!MLX5_CAP_GEN(mdev, fpga)) {
+ mlx5_core_dbg(mdev, "FPGA capability not present\n");
+ return 0;
+ }
+
+ mlx5_core_dbg(mdev, "Initializing FPGA\n");
+
+ fdev = mlx5_fpga_device_alloc();
+ if (!fdev)
+ return -ENOMEM;
+
+ fdev->mdev = mdev;
+ mdev->fpga = fdev;
+
+ return 0;
+}
+
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int max_num_qps;
+ unsigned long flags;
+ int err;
+
+ if (!fdev)
+ return;
+
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ return;
+ }
+ fdev->state = MLX5_FPGA_STATUS_NONE;
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+
+ if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+ if (err)
+ mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
+ err);
+ }
+
+ mlx5_fpga_conn_device_cleanup(fdev);
+ max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+ mlx5_core_unreserve_gids(mdev, max_num_qps);
+}
+
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ mlx5_fpga_device_stop(mdev);
+ kfree(fdev);
+ mdev->fpga = NULL;
+}
+
+static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
+{
+ if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
+ return mlx5_fpga_error_strings[syndrome];
+ return "Unknown";
+}
+
+static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
+{
+ if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
+ return mlx5_fpga_qp_error_strings[syndrome];
+ return "Unknown";
+}
+
+void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ const char *event_name;
+ bool teardown = false;
+ unsigned long flags;
+ u8 syndrome;
+
+ switch (event) {
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ syndrome = MLX5_GET(fpga_error_event, data, syndrome);
+ event_name = mlx5_fpga_syndrome_to_string(syndrome);
+ break;
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
+ event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
+ break;
+ default:
+ mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
+ event);
+ return;
+ }
+
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ switch (fdev->state) {
+ case MLX5_FPGA_STATUS_SUCCESS:
+ mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
+ teardown = true;
+ break;
+ default:
+ mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
+ syndrome, event_name);
+ }
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ /* We tear-down the card's interfaces and functionality because
+ * the FPGA bump-on-the-wire is misbehaving and we lose ability
+ * to communicate with the network. User may still be able to
+ * recover by re-programming or debugging the FPGA
+ */
+ if (teardown)
+ mlx5_trigger_health_work(fdev->mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
new file mode 100644
index 000000000..3e2355c8d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_FPGA_CORE_H__
+#define __MLX5_FPGA_CORE_H__
+
+#ifdef CONFIG_MLX5_FPGA
+
+#include "fpga/cmd.h"
+
+/* Represents an Innova device */
+struct mlx5_fpga_device {
+ struct mlx5_core_dev *mdev;
+ spinlock_t state_lock; /* Protects state transitions */
+ enum mlx5_fpga_status state;
+ enum mlx5_fpga_image last_admin_image;
+ enum mlx5_fpga_image last_oper_image;
+
+ /* QP Connection resources */
+ struct {
+ u32 pdn;
+ struct mlx5_core_mkey mkey;
+ struct mlx5_uars_page *uar;
+ } conn_res;
+
+ struct mlx5_fpga_ipsec *ipsec;
+ struct mlx5_fpga_tls *tls;
+};
+
+#define mlx5_fpga_dbg(__adev, format, ...) \
+ dev_dbg(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_err(__adev, format, ...) \
+ dev_err(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_warn(__adev, format, ...) \
+ dev_warn(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \
+ dev_warn_ratelimited(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d: " \
+ format, __func__, __LINE__, ##__VA_ARGS__)
+
+#define mlx5_fpga_notice(__adev, format, ...) \
+ dev_notice(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+
+#define mlx5_fpga_info(__adev, format, ...) \
+ dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+
+int mlx5_fpga_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
+int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
+void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
+
+#else
+
+static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+}
+
+static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
+ void *data)
+{
+}
+
+#endif
+
+#endif /* __MLX5_FPGA_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
new file mode 100644
index 000000000..715ccafc9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -0,0 +1,1533 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/rhashtable.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
+
+#include "mlx5_core.h"
+#include "fs_cmd.h"
+#include "fpga/ipsec.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+
+enum mlx5_fpga_ipsec_cmd_status {
+ MLX5_FPGA_IPSEC_CMD_PENDING,
+ MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
+ MLX5_FPGA_IPSEC_CMD_COMPLETE,
+};
+
+struct mlx5_fpga_ipsec_cmd_context {
+ struct mlx5_fpga_dma_buf buf;
+ enum mlx5_fpga_ipsec_cmd_status status;
+ struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
+ int status_code;
+ struct completion complete;
+ struct mlx5_fpga_device *dev;
+ struct list_head list; /* Item in pending_cmds */
+ u8 command[0];
+};
+
+struct mlx5_fpga_esp_xfrm;
+
+struct mlx5_fpga_ipsec_sa_ctx {
+ struct rhash_head hash;
+ struct mlx5_ifc_fpga_ipsec_sa hw_sa;
+ struct mlx5_core_dev *dev;
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+};
+
+struct mlx5_fpga_esp_xfrm {
+ unsigned int num_rules;
+ struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
+ struct mutex lock; /* xfrm lock */
+ struct mlx5_accel_esp_xfrm accel_xfrm;
+};
+
+struct mlx5_fpga_ipsec_rule {
+ struct rb_node node;
+ struct fs_fte *fte;
+ struct mlx5_fpga_ipsec_sa_ctx *ctx;
+};
+
+static const struct rhashtable_params rhash_sa = {
+ /* Keep out "cmd" field from the key as it's
+ * value is not constant during the lifetime
+ * of the key object.
+ */
+ .key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) -
+ FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
+ .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) +
+ FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
+ .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+};
+
+struct mlx5_fpga_ipsec {
+ struct mlx5_fpga_device *fdev;
+ struct list_head pending_cmds;
+ spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+ u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
+ struct mlx5_fpga_conn *conn;
+
+ struct notifier_block fs_notifier_ingress_bypass;
+ struct notifier_block fs_notifier_egress;
+
+ /* Map hardware SA --> SA context
+ * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx)
+ * We will use this hash to avoid SAs duplication in fpga which
+ * aren't allowed
+ */
+ struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
+ struct mutex sa_hash_lock;
+
+ /* Tree holding all rules for this fpga device
+ * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id)
+ */
+ struct rb_root rules_rb;
+ struct mutex rules_rb_lock; /* rules lock */
+};
+
+static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
+{
+ if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+ MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+ MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC)
+ return false;
+
+ return true;
+}
+
+static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf,
+ u8 status)
+{
+ struct mlx5_fpga_ipsec_cmd_context *context;
+
+ if (status) {
+ context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context,
+ buf);
+ mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
+ status);
+ context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL;
+ complete(&context->complete);
+ }
+}
+
+static inline
+int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
+{
+ switch (syndrome) {
+ case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS:
+ return 0;
+ case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE:
+ return -EEXIST;
+ case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+ return -EINVAL;
+ case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+ return -EIO;
+ }
+ return -EIO;
+}
+
+static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
+ struct mlx5_fpga_ipsec_cmd_context *context;
+ enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
+ struct mlx5_fpga_device *fdev = cb_arg;
+ unsigned long flags;
+
+ if (buf->sg[0].size < sizeof(*resp)) {
+ mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n",
+ buf->sg[0].size, sizeof(*resp));
+ return;
+ }
+
+ mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n",
+ ntohl(resp->syndrome));
+
+ spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+ context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
+ struct mlx5_fpga_ipsec_cmd_context,
+ list);
+ if (context)
+ list_del(&context->list);
+ spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+ if (!context) {
+ mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n");
+ return;
+ }
+ mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
+
+ syndrome = ntohl(resp->syndrome);
+ context->status_code = syndrome_to_errno(syndrome);
+ context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE;
+ memcpy(&context->resp, resp, sizeof(*resp));
+
+ if (context->status_code)
+ mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n",
+ syndrome);
+
+ complete(&context->complete);
+}
+
+static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
+ const void *cmd, int cmd_size)
+{
+ struct mlx5_fpga_ipsec_cmd_context *context;
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned long flags;
+ int res;
+
+ if (!fdev || !fdev->ipsec)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (cmd_size & 3)
+ return ERR_PTR(-EINVAL);
+
+ context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ context->status = MLX5_FPGA_IPSEC_CMD_PENDING;
+ context->dev = fdev;
+ context->buf.complete = mlx5_fpga_ipsec_send_complete;
+ init_completion(&context->complete);
+ memcpy(&context->command, cmd, cmd_size);
+ context->buf.sg[0].size = cmd_size;
+ context->buf.sg[0].data = &context->command;
+
+ spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+ res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
+ if (!res)
+ list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
+ spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+ if (res) {
+ mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res);
+ kfree(context);
+ return ERR_PTR(res);
+ }
+
+ /* Context should be freed by the caller after completion. */
+ return context;
+}
+
+static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
+{
+ struct mlx5_fpga_ipsec_cmd_context *context = ctx;
+ unsigned long timeout =
+ msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC);
+ int res;
+
+ res = wait_for_completion_timeout(&context->complete, timeout);
+ if (!res) {
+ mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
+ return -ETIMEDOUT;
+ }
+
+ if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE)
+ res = context->status_code;
+ else
+ res = -EIO;
+
+ return res;
+}
+
+static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec)
+{
+ if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command))
+ return true;
+ return false;
+}
+
+static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev,
+ struct mlx5_ifc_fpga_ipsec_sa *hw_sa,
+ int opcode)
+{
+ struct mlx5_core_dev *dev = fdev->mdev;
+ struct mlx5_ifc_fpga_ipsec_sa *sa;
+ struct mlx5_fpga_ipsec_cmd_context *cmd_context;
+ size_t sa_cmd_size;
+ int err;
+
+ hw_sa->ipsec_sa_v1.cmd = htonl(opcode);
+ if (is_v2_sadb_supported(fdev->ipsec))
+ sa_cmd_size = sizeof(*hw_sa);
+ else
+ sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1);
+
+ cmd_context = (struct mlx5_fpga_ipsec_cmd_context *)
+ mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size);
+ if (IS_ERR(cmd_context))
+ return PTR_ERR(cmd_context);
+
+ err = mlx5_fpga_ipsec_cmd_wait(cmd_context);
+ if (err)
+ goto out;
+
+ sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command;
+ if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) {
+ mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+ ntohl(sa->ipsec_sa_v1.sw_sa_handle),
+ ntohl(cmd_context->resp.sw_sa_handle));
+ err = -EIO;
+ }
+
+out:
+ kfree(cmd_context);
+ return err;
+}
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ u32 ret = 0;
+
+ if (mlx5_fpga_is_ipsec_device(mdev)) {
+ ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
+ ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA;
+ } else {
+ return ret;
+ }
+
+ if (!fdev->ipsec)
+ return ret;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
+ ret |= MLX5_ACCEL_IPSEC_CAP_ESP;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
+ ret |= MLX5_ACCEL_IPSEC_CAP_IPV6;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
+ ret |= MLX5_ACCEL_IPSEC_CAP_LSO;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
+ ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) {
+ ret |= MLX5_ACCEL_IPSEC_CAP_ESN;
+ ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
+ }
+
+ return ret;
+}
+
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ if (!fdev || !fdev->ipsec)
+ return 0;
+
+ return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+ number_of_ipsec_counters);
+}
+
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int counters_count)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int i;
+ __be32 *data;
+ u32 count;
+ u64 addr;
+ int ret;
+
+ if (!fdev || !fdev->ipsec)
+ return 0;
+
+ addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+ ipsec_counters_addr_low) +
+ ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+ ipsec_counters_addr_high) << 32);
+
+ count = mlx5_fpga_ipsec_counters_count(mdev);
+
+ data = kzalloc(array3_size(sizeof(*data), count, 2), GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data,
+ MLX5_FPGA_ACCESS_TYPE_DONTCARE);
+ if (ret < 0) {
+ mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n",
+ ret);
+ goto out;
+ }
+ ret = 0;
+
+ if (count > counters_count)
+ count = counters_count;
+
+ /* Each counter is low word, then high. But each word is big-endian */
+ for (i = 0; i < count; i++)
+ counters[i] = (u64)ntohl(data[i * 2]) |
+ ((u64)ntohl(data[i * 2 + 1]) << 32);
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
+{
+ struct mlx5_fpga_ipsec_cmd_context *context;
+ struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
+ int err;
+
+ cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
+ cmd.flags = htonl(flags);
+ context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
+ if (IS_ERR(context))
+ return PTR_ERR(context);
+
+ err = mlx5_fpga_ipsec_cmd_wait(context);
+ if (err)
+ goto out;
+
+ if ((context->resp.flags & cmd.flags) != cmd.flags) {
+ mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n",
+ cmd.flags,
+ context->resp.flags);
+ err = -EIO;
+ }
+
+out:
+ kfree(context);
+ return err;
+}
+
+static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
+{
+ u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
+ u32 flags = 0;
+
+ if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER)
+ flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
+
+ return mlx5_fpga_ipsec_set_caps(mdev, flags);
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+ struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+ const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
+
+ /* key */
+ memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key,
+ aes_gcm->key_len / 8);
+ /* Duplicate 128 bit key twice according to HW layout */
+ if (aes_gcm->key_len == 128)
+ memcpy(&hw_sa->ipsec_sa_v1.key_enc[16],
+ aes_gcm->aes_key, aes_gcm->key_len / 8);
+
+ /* salt and seq_iv */
+ memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv,
+ sizeof(aes_gcm->seq_iv));
+ memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
+ sizeof(aes_gcm->salt));
+
+ /* esn */
+ if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
+ hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN;
+ hw_sa->ipsec_sa_v1.flags |=
+ (xfrm_attrs->flags &
+ MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+ MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+ hw_sa->esn = htonl(xfrm_attrs->esn);
+ } else {
+ hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN;
+ hw_sa->ipsec_sa_v1.flags &=
+ ~(xfrm_attrs->flags &
+ MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+ MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+ hw_sa->esn = 0;
+ }
+
+ /* rx handle */
+ hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
+
+ /* enc mode */
+ switch (aes_gcm->key_len) {
+ case 128:
+ hw_sa->ipsec_sa_v1.enc_mode =
+ MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128;
+ break;
+ case 256:
+ hw_sa->ipsec_sa_v1.enc_mode =
+ MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128;
+ break;
+ }
+
+ /* flags */
+ hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID |
+ MLX5_FPGA_IPSEC_SA_SPI_EN |
+ MLX5_FPGA_IPSEC_SA_IP_ESP;
+
+ if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT)
+ hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX;
+ else
+ hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX;
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6,
+ struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+ mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa);
+
+ /* IPs */
+ memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip));
+ memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip));
+
+ /* SPI */
+ hw_sa->ipsec_sa_v1.spi = spi;
+
+ /* flags */
+ if (is_ipv6)
+ hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
+}
+
+static bool is_full_mask(const void *p, size_t len)
+{
+ WARN_ON(len % 4);
+
+ return !memchr_inv(p, 0xff, len);
+}
+
+static bool validate_fpga_full_mask(struct mlx5_core_dev *dev,
+ const u32 *match_c,
+ const u32 *match_v)
+{
+ const void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+ match_c,
+ misc_parameters);
+ const void *headers_c = MLX5_ADDR_OF(fte_match_param,
+ match_c,
+ outer_headers);
+ const void *headers_v = MLX5_ADDR_OF(fte_match_param,
+ match_v,
+ outer_headers);
+
+ if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) {
+ const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+ ipv4)) ||
+ !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+ ipv4)))
+ return false;
+ } else {
+ const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+
+ if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6)) ||
+ !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6)))
+ return false;
+ }
+
+ if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+ outer_esp_spi),
+ MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi)))
+ return false;
+
+ return true;
+}
+
+static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
+ u8 match_criteria_enable,
+ const u32 *match_c,
+ const u32 *match_v)
+{
+ u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev);
+ bool ipv6_flow;
+
+ ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
+
+ if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) ||
+ mlx5_fs_is_outer_udp_flow(match_c, match_v) ||
+ mlx5_fs_is_outer_tcp_flow(match_c, match_v) ||
+ mlx5_fs_is_vxlan_flow(match_c) ||
+ !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) ||
+ ipv6_flow))
+ return false;
+
+ if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE))
+ return false;
+
+ if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) &&
+ mlx5_fs_is_outer_ipsec_flow(match_c))
+ return false;
+
+ if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) &&
+ ipv6_flow)
+ return false;
+
+ if (!validate_fpga_full_mask(dev, match_c, match_v))
+ return false;
+
+ return true;
+}
+
+static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
+ u8 match_criteria_enable,
+ const u32 *match_c,
+ const u32 *match_v,
+ struct mlx5_flow_act *flow_act)
+{
+ const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) ||
+ MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0);
+ bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) ||
+ MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0);
+ int ret;
+
+ ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c,
+ match_v);
+ if (!ret)
+ return ret;
+
+ if (is_dmac || is_smac ||
+ (match_criteria_enable &
+ ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
+ (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
+ flow_act->has_flow_tag)
+ return false;
+
+ return true;
+}
+
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *accel_xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6)
+{
+ struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+ container_of(accel_xfrm, typeof(*fpga_xfrm),
+ accel_xfrm);
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ int opcode, err;
+ void *context;
+
+ /* alloc SA */
+ sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
+ if (!sa_ctx)
+ return ERR_PTR(-ENOMEM);
+
+ sa_ctx->dev = mdev;
+
+ /* build candidate SA */
+ mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs,
+ saddr, daddr, spi, is_ipv6,
+ &sa_ctx->hw_sa);
+
+ mutex_lock(&fpga_xfrm->lock);
+
+ if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */
+ /* all rules must be with same IPs and SPI */
+ if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa,
+ sizeof(sa_ctx->hw_sa))) {
+ context = ERR_PTR(-EINVAL);
+ goto exists;
+ }
+
+ ++fpga_xfrm->num_rules;
+ context = fpga_xfrm->sa_ctx;
+ goto exists;
+ }
+
+ /* This is unbounded fpga_xfrm, try to add to hash */
+ mutex_lock(&fipsec->sa_hash_lock);
+
+ err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash,
+ rhash_sa);
+ if (err) {
+ /* Can't bound different accel_xfrm to already existing sa_ctx.
+ * This is because we can't support multiple ketmats for
+ * same IPs and SPI
+ */
+ context = ERR_PTR(-EEXIST);
+ goto unlock_hash;
+ }
+
+ /* Bound accel_xfrm to sa_ctx */
+ opcode = is_v2_sadb_supported(fdev->ipsec) ?
+ MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 :
+ MLX5_FPGA_IPSEC_CMD_OP_ADD_SA;
+ err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+ sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+ if (err) {
+ context = ERR_PTR(err);
+ goto delete_hash;
+ }
+
+ mutex_unlock(&fipsec->sa_hash_lock);
+
+ ++fpga_xfrm->num_rules;
+ fpga_xfrm->sa_ctx = sa_ctx;
+ sa_ctx->fpga_xfrm = fpga_xfrm;
+
+ mutex_unlock(&fpga_xfrm->lock);
+
+ return sa_ctx;
+
+delete_hash:
+ WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+ rhash_sa));
+unlock_hash:
+ mutex_unlock(&fipsec->sa_hash_lock);
+
+exists:
+ mutex_unlock(&fpga_xfrm->lock);
+ kfree(sa_ctx);
+ return context;
+}
+
+static void *
+mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
+ struct fs_fte *fte,
+ bool is_egress)
+{
+ struct mlx5_accel_esp_xfrm *accel_xfrm;
+ __be32 saddr[4], daddr[4], spi;
+ struct mlx5_flow_group *fg;
+ bool is_ipv6 = false;
+
+ fs_get_obj(fg, fte->node.parent);
+ /* validate */
+ if (is_egress &&
+ !mlx5_is_fpga_egress_ipsec_rule(mdev,
+ fg->mask.match_criteria_enable,
+ fg->mask.match_criteria,
+ fte->val,
+ &fte->action))
+ return ERR_PTR(-EINVAL);
+ else if (!mlx5_is_fpga_ipsec_rule(mdev,
+ fg->mask.match_criteria_enable,
+ fg->mask.match_criteria,
+ fte->val))
+ return ERR_PTR(-EINVAL);
+
+ /* get xfrm context */
+ accel_xfrm =
+ (struct mlx5_accel_esp_xfrm *)fte->action.esp_id;
+
+ /* IPs */
+ if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria,
+ fte->val)) {
+ memcpy(&saddr[3],
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ fte->val,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ sizeof(saddr[3]));
+ memcpy(&daddr[3],
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ fte->val,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ sizeof(daddr[3]));
+ } else {
+ memcpy(saddr,
+ MLX5_ADDR_OF(fte_match_param,
+ fte->val,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(saddr));
+ memcpy(daddr,
+ MLX5_ADDR_OF(fte_match_param,
+ fte->val,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(daddr));
+ is_ipv6 = true;
+ }
+
+ /* SPI */
+ spi = MLX5_GET_BE(typeof(spi),
+ fte_match_param, fte->val,
+ misc_parameters.outer_esp_spi);
+
+ /* create */
+ return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
+ saddr, daddr,
+ spi, is_ipv6);
+}
+
+static void
+mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
+{
+ struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ int opcode = is_v2_sadb_supported(fdev->ipsec) ?
+ MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 :
+ MLX5_FPGA_IPSEC_CMD_OP_DEL_SA;
+ int err;
+
+ err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+ sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+ if (err) {
+ WARN_ON(err);
+ return;
+ }
+
+ mutex_lock(&fipsec->sa_hash_lock);
+ WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+ rhash_sa));
+ mutex_unlock(&fipsec->sa_hash_lock);
+}
+
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+ ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm;
+
+ mutex_lock(&fpga_xfrm->lock);
+ if (!--fpga_xfrm->num_rules) {
+ mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
+ kfree(fpga_xfrm->sa_ctx);
+ fpga_xfrm->sa_ctx = NULL;
+ }
+ mutex_unlock(&fpga_xfrm->lock);
+}
+
+static inline struct mlx5_fpga_ipsec_rule *
+_rule_search(struct rb_root *root, struct fs_fte *fte)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct mlx5_fpga_ipsec_rule *rule =
+ container_of(node, struct mlx5_fpga_ipsec_rule,
+ node);
+
+ if (rule->fte < fte)
+ node = node->rb_left;
+ else if (rule->fte > fte)
+ node = node->rb_right;
+ else
+ return rule;
+ }
+ return NULL;
+}
+
+static struct mlx5_fpga_ipsec_rule *
+rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte)
+{
+ struct mlx5_fpga_ipsec_rule *rule;
+
+ mutex_lock(&ipsec_dev->rules_rb_lock);
+ rule = _rule_search(&ipsec_dev->rules_rb, fte);
+ mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+ return rule;
+}
+
+static inline int _rule_insert(struct rb_root *root,
+ struct mlx5_fpga_ipsec_rule *rule)
+{
+ struct rb_node **new = &root->rb_node, *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct mlx5_fpga_ipsec_rule *this =
+ container_of(*new, struct mlx5_fpga_ipsec_rule,
+ node);
+
+ parent = *new;
+ if (rule->fte < this->fte)
+ new = &((*new)->rb_left);
+ else if (rule->fte > this->fte)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&rule->node, parent, new);
+ rb_insert_color(&rule->node, root);
+
+ return 0;
+}
+
+static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev,
+ struct mlx5_fpga_ipsec_rule *rule)
+{
+ int ret;
+
+ mutex_lock(&ipsec_dev->rules_rb_lock);
+ ret = _rule_insert(&ipsec_dev->rules_rb, rule);
+ mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+ return ret;
+}
+
+static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+ struct mlx5_fpga_ipsec_rule *rule)
+{
+ struct rb_root *root = &ipsec_dev->rules_rb;
+
+ mutex_lock(&ipsec_dev->rules_rb_lock);
+ rb_erase(&rule->node, root);
+ mutex_unlock(&ipsec_dev->rules_rb_lock);
+}
+
+static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+ struct mlx5_fpga_ipsec_rule *rule)
+{
+ _rule_delete(ipsec_dev, rule);
+ kfree(rule);
+}
+
+struct mailbox_mod {
+ uintptr_t saved_esp_id;
+ u32 saved_action;
+ u32 saved_outer_esp_spi_value;
+};
+
+static void restore_spec_mailbox(struct fs_fte *fte,
+ struct mailbox_mod *mbox_mod)
+{
+ char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+ fte->val,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+ mbox_mod->saved_outer_esp_spi_value);
+ fte->action.action |= mbox_mod->saved_action;
+ fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id;
+}
+
+static void modify_spec_mailbox(struct mlx5_core_dev *mdev,
+ struct fs_fte *fte,
+ struct mailbox_mod *mbox_mod)
+{
+ char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+ fte->val,
+ misc_parameters);
+
+ mbox_mod->saved_esp_id = fte->action.esp_id;
+ mbox_mod->saved_action = fte->action.action &
+ (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+ mbox_mod->saved_outer_esp_spi_value =
+ MLX5_GET(fte_match_set_misc, misc_params_v,
+ outer_esp_spi);
+
+ fte->action.esp_id = 0;
+ fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+ if (!MLX5_CAP_FLOWTABLE(mdev,
+ flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0);
+}
+
+static enum fs_flow_table_type egress_to_fs_ft(bool egress)
+{
+ return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
+}
+
+static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id,
+ bool is_egress)
+{
+ int (*create_flow_group)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft, u32 *in,
+ unsigned int *group_id) =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
+ char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
+ match_criteria.misc_parameters);
+ u32 saved_outer_esp_spi_mask;
+ u8 match_criteria_enable;
+ int ret;
+
+ if (MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ return create_flow_group(dev, ft, in, group_id);
+
+ match_criteria_enable =
+ MLX5_GET(create_flow_group_in, in, match_criteria_enable);
+ saved_outer_esp_spi_mask =
+ MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+ if (!match_criteria_enable || !saved_outer_esp_spi_mask)
+ return create_flow_group(dev, ft, in, group_id);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
+
+ if (!(*misc_params_c) &&
+ !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
+
+ ret = create_flow_group(dev, ft, in, group_id);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
+
+ return ret;
+}
+
+static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte,
+ bool is_egress)
+{
+ int (*create_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte) =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
+ struct mlx5_fpga_device *fdev = dev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ struct mlx5_fpga_ipsec_rule *rule;
+ bool is_esp = fte->action.esp_id;
+ struct mailbox_mod mbox_mod;
+ int ret;
+
+ if (!is_esp ||
+ !(fte->action.action &
+ (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+ return create_fte(dev, ft, fg, fte);
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return -ENOMEM;
+
+ rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
+ if (IS_ERR(rule->ctx)) {
+ int err = PTR_ERR(rule->ctx);
+ kfree(rule);
+ return err;
+ }
+
+ rule->fte = fte;
+ WARN_ON(rule_insert(fipsec, rule));
+
+ modify_spec_mailbox(dev, fte, &mbox_mod);
+ ret = create_fte(dev, ft, fg, fte);
+ restore_spec_mailbox(fte, &mbox_mod);
+ if (ret) {
+ _rule_delete(fipsec, rule);
+ mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+ kfree(rule);
+ }
+
+ return ret;
+}
+
+static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte,
+ bool is_egress)
+{
+ int (*update_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte) =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
+ bool is_esp = fte->action.esp_id;
+ struct mailbox_mod mbox_mod;
+ int ret;
+
+ if (!is_esp ||
+ !(fte->action.action &
+ (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+ return update_fte(dev, ft, group_id, modify_mask, fte);
+
+ modify_spec_mailbox(dev, fte, &mbox_mod);
+ ret = update_fte(dev, ft, group_id, modify_mask, fte);
+ restore_spec_mailbox(fte, &mbox_mod);
+
+ return ret;
+}
+
+static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte,
+ bool is_egress)
+{
+ int (*delete_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte) =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
+ struct mlx5_fpga_device *fdev = dev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ struct mlx5_fpga_ipsec_rule *rule;
+ bool is_esp = fte->action.esp_id;
+ struct mailbox_mod mbox_mod;
+ int ret;
+
+ if (!is_esp ||
+ !(fte->action.action &
+ (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+ return delete_fte(dev, ft, fte);
+
+ rule = rule_search(fipsec, fte);
+ if (!rule)
+ return -ENOENT;
+
+ mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+ rule_delete(fipsec, rule);
+
+ modify_spec_mailbox(dev, fte, &mbox_mod);
+ ret = delete_fte(dev, ft, fte);
+ restore_spec_mailbox(fte, &mbox_mod);
+
+ return ret;
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id)
+{
+ return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+ true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_delete_fte(dev, ft, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id)
+{
+ return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+ false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_delete_fte(dev, ft, fte, false);
+}
+
+static struct mlx5_flow_cmds fpga_ipsec_ingress;
+static struct mlx5_flow_cmds fpga_ipsec_egress;
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+ switch (type) {
+ case FS_FT_NIC_RX:
+ return &fpga_ipsec_ingress;
+ case FS_FT_NIC_TX:
+ return &fpga_ipsec_egress;
+ default:
+ WARN_ON(true);
+ return NULL;
+ }
+}
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_conn_attr init_attr = {0};
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_conn *conn;
+ int err;
+
+ if (!mlx5_fpga_is_ipsec_device(mdev))
+ return 0;
+
+ fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL);
+ if (!fdev->ipsec)
+ return -ENOMEM;
+
+ fdev->ipsec->fdev = fdev;
+
+ err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
+ fdev->ipsec->caps);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n",
+ err);
+ goto error;
+ }
+
+ INIT_LIST_HEAD(&fdev->ipsec->pending_cmds);
+ spin_lock_init(&fdev->ipsec->pending_cmds_lock);
+
+ init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.recv_cb = mlx5_fpga_ipsec_recv;
+ init_attr.cb_arg = fdev;
+ conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+ if (IS_ERR(conn)) {
+ err = PTR_ERR(conn);
+ mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n",
+ err);
+ goto error;
+ }
+ fdev->ipsec->conn = conn;
+
+ err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa);
+ if (err)
+ goto err_destroy_conn;
+ mutex_init(&fdev->ipsec->sa_hash_lock);
+
+ fdev->ipsec->rules_rb = RB_ROOT;
+ mutex_init(&fdev->ipsec->rules_rb_lock);
+
+ err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
+ err);
+ goto err_destroy_hash;
+ }
+
+ return 0;
+
+err_destroy_hash:
+ rhashtable_destroy(&fdev->ipsec->sa_hash);
+
+err_destroy_conn:
+ mlx5_fpga_sbu_conn_destroy(conn);
+
+error:
+ kfree(fdev->ipsec);
+ fdev->ipsec = NULL;
+ return err;
+}
+
+static void destroy_rules_rb(struct rb_root *root)
+{
+ struct mlx5_fpga_ipsec_rule *r, *tmp;
+
+ rbtree_postorder_for_each_entry_safe(r, tmp, root, node) {
+ rb_erase(&r->node, root);
+ mlx5_fpga_ipsec_delete_sa_ctx(r->ctx);
+ kfree(r);
+ }
+}
+
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ if (!mlx5_fpga_is_ipsec_device(mdev))
+ return;
+
+ destroy_rules_rb(&fdev->ipsec->rules_rb);
+ rhashtable_destroy(&fdev->ipsec->sa_hash);
+
+ mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
+ kfree(fdev->ipsec);
+ fdev->ipsec = NULL;
+}
+
+void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+ /* ingress */
+ fpga_ipsec_ingress.create_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table;
+ fpga_ipsec_ingress.destroy_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table;
+ fpga_ipsec_ingress.modify_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table;
+ fpga_ipsec_ingress.create_flow_group =
+ mlx5_fpga_ipsec_fs_create_flow_group_ingress;
+ fpga_ipsec_ingress.destroy_flow_group =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group;
+ fpga_ipsec_ingress.create_fte =
+ mlx5_fpga_ipsec_fs_create_fte_ingress;
+ fpga_ipsec_ingress.update_fte =
+ mlx5_fpga_ipsec_fs_update_fte_ingress;
+ fpga_ipsec_ingress.delete_fte =
+ mlx5_fpga_ipsec_fs_delete_fte_ingress;
+ fpga_ipsec_ingress.update_root_ft =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft;
+
+ /* egress */
+ fpga_ipsec_egress.create_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table;
+ fpga_ipsec_egress.destroy_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table;
+ fpga_ipsec_egress.modify_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table;
+ fpga_ipsec_egress.create_flow_group =
+ mlx5_fpga_ipsec_fs_create_flow_group_egress;
+ fpga_ipsec_egress.destroy_flow_group =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group;
+ fpga_ipsec_egress.create_fte =
+ mlx5_fpga_ipsec_fs_create_fte_egress;
+ fpga_ipsec_egress.update_fte =
+ mlx5_fpga_ipsec_fs_update_fte_egress;
+ fpga_ipsec_egress.delete_fte =
+ mlx5_fpga_ipsec_fs_delete_fte_egress;
+ fpga_ipsec_egress.update_root_ft =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft;
+}
+
+static int
+mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ if (attrs->tfc_pad) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
+ mlx5_core_err(mdev, "Only aes gcm keymat is supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->keymat.aes_gcm.iv_algo !=
+ MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
+ mlx5_core_err(mdev, "Only iv sequence algo is supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->keymat.aes_gcm.icv_len != 128) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->keymat.aes_gcm.key_len != 128 &&
+ attrs->keymat.aes_gcm.key_len != 256) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
+ (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps,
+ v2_command))) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 flags)
+{
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+
+ if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) {
+ mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+ mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL);
+ if (!fpga_xfrm)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&fpga_xfrm->lock);
+ memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs,
+ sizeof(fpga_xfrm->accel_xfrm.attrs));
+
+ return &fpga_xfrm->accel_xfrm;
+}
+
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+ container_of(xfrm, struct mlx5_fpga_esp_xfrm,
+ accel_xfrm);
+ /* assuming no sa_ctx are connected to this xfrm_ctx */
+ kfree(fpga_xfrm);
+}
+
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct mlx5_core_dev *mdev = xfrm->mdev;
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+ struct mlx5_ifc_fpga_ipsec_sa org_hw_sa;
+
+ int err = 0;
+
+ if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
+ return 0;
+
+ if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+ mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (is_v2_sadb_supported(fipsec)) {
+ mlx5_core_warn(mdev, "Modify esp is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm);
+
+ mutex_lock(&fpga_xfrm->lock);
+
+ if (!fpga_xfrm->sa_ctx)
+ /* Unbounded xfrm, chane only sw attrs */
+ goto change_sw_xfrm_attrs;
+
+ /* copy original hw sa */
+ memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa));
+ mutex_lock(&fipsec->sa_hash_lock);
+ /* remove original hw sa from hash */
+ WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+ &fpga_xfrm->sa_ctx->hash, rhash_sa));
+ /* update hw_sa with new xfrm attrs*/
+ mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs,
+ &fpga_xfrm->sa_ctx->hw_sa);
+ /* try to insert new hw_sa to hash */
+ err = rhashtable_insert_fast(&fipsec->sa_hash,
+ &fpga_xfrm->sa_ctx->hash, rhash_sa);
+ if (err)
+ goto rollback_sa;
+
+ /* modify device with new hw_sa */
+ err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa,
+ MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2);
+ fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+ if (err)
+ WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+ &fpga_xfrm->sa_ctx->hash,
+ rhash_sa));
+rollback_sa:
+ if (err) {
+ /* return original hw_sa to hash */
+ memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa,
+ sizeof(org_hw_sa));
+ WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash,
+ &fpga_xfrm->sa_ctx->hash,
+ rhash_sa));
+ }
+ mutex_unlock(&fipsec->sa_hash_lock);
+
+change_sw_xfrm_attrs:
+ if (!err)
+ memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
+ mutex_unlock(&fpga_xfrm->lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
new file mode 100644
index 000000000..2b5e63b0d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_IPSEC_H__
+#define __MLX5_FPGA_IPSEC_H__
+
+#include "accel/ipsec.h"
+#include "fs_cmd.h"
+
+#ifdef CONFIG_MLX5_FPGA
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int counters_count);
+
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *accel_xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6);
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context);
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_build_fs_cmds(void);
+
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 flags);
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs);
+
+const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
+
+#else
+
+static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline unsigned int
+mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
+ u64 *counters)
+{
+ return 0;
+}
+
+static inline void *
+mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *accel_xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6)
+{
+ return NULL;
+}
+
+static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+}
+
+static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+}
+
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 flags)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+}
+
+static inline int
+mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+ return mlx5_fs_cmd_get_default(type);
+}
+
+#endif /* CONFIG_MLX5_FPGA */
+
+#endif /* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
new file mode 100644
index 000000000..14962969c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "fpga/core.h"
+#include "fpga/conn.h"
+#include "fpga/sdk.h"
+
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr)
+{
+ return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create);
+
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+ mlx5_fpga_conn_destroy(conn);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy);
+
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ return mlx5_fpga_conn_send(conn, buf);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg);
+
+static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
+ u64 addr, u8 *buf)
+{
+ size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+ size_t bytes_done = 0;
+ u8 actual_size;
+ int err;
+
+ if (!size)
+ return -EINVAL;
+
+ if (!fdev->mdev)
+ return -ENOTCONN;
+
+ while (bytes_done < size) {
+ actual_size = min(max_size, (size - bytes_done));
+
+ err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+ addr + bytes_done,
+ buf + bytes_done, false);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n",
+ err);
+ break;
+ }
+
+ bytes_done += actual_size;
+ }
+
+ return err;
+}
+
+static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
+ u64 addr, u8 *buf)
+{
+ size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+ size_t bytes_done = 0;
+ u8 actual_size;
+ int err;
+
+ if (!size)
+ return -EINVAL;
+
+ if (!fdev->mdev)
+ return -ENOTCONN;
+
+ while (bytes_done < size) {
+ actual_size = min(max_size, (size - bytes_done));
+
+ err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+ addr + bytes_done,
+ buf + bytes_done, true);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n");
+ break;
+ }
+
+ bytes_done += actual_size;
+ }
+
+ return err;
+}
+
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type)
+{
+ int ret;
+
+ switch (access_type) {
+ case MLX5_FPGA_ACCESS_TYPE_I2C:
+ ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf);
+ if (ret)
+ return ret;
+ break;
+ default:
+ mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n",
+ access_type);
+ return -EACCES;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_read);
+
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type)
+{
+ int ret;
+
+ switch (access_type) {
+ case MLX5_FPGA_ACCESS_TYPE_I2C:
+ ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf);
+ if (ret)
+ return ret;
+ break;
+ default:
+ mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n",
+ access_type);
+ return -EACCES;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_write);
+
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf)
+{
+ return mlx5_fpga_sbu_caps(fdev->mdev, buf, size);
+}
+EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
new file mode 100644
index 000000000..656f96be6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MLX5_FPGA_SDK_H
+#define MLX5_FPGA_SDK_H
+
+#include <linux/types.h>
+#include <linux/dma-direction.h>
+
+/**
+ * DOC: Innova SDK
+ * This header defines the in-kernel API for Innova FPGA client drivers.
+ */
+#define SBU_QP_QUEUE_SIZE 8
+#define MLX5_FPGA_CMD_TIMEOUT_MSEC (60 * 1000)
+
+/**
+ * enum mlx5_fpga_access_type - Enumerated the different methods possible for
+ * accessing the device memory address space
+ */
+enum mlx5_fpga_access_type {
+ /** Use the slow CX-FPGA I2C bus */
+ MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
+ /** Use the fastest available method */
+ MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0,
+};
+
+struct mlx5_fpga_conn;
+struct mlx5_fpga_device;
+
+/**
+ * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry
+ */
+struct mlx5_fpga_dma_entry {
+ /** @data: Virtual address pointer to the data */
+ void *data;
+ /** @size: Size in bytes of the data */
+ unsigned int size;
+ /** @dma_addr: Private member. Physical DMA-mapped address of the data */
+ dma_addr_t dma_addr;
+};
+
+/**
+ * struct mlx5_fpga_dma_buf - A packet buffer
+ * May contain up to 2 scatter-gather data entries
+ */
+struct mlx5_fpga_dma_buf {
+ /** @dma_dir: DMA direction */
+ enum dma_data_direction dma_dir;
+ /** @sg: Scatter-gather entries pointing to the data in memory */
+ struct mlx5_fpga_dma_entry sg[2];
+ /** @list: Item in SQ backlog, for TX packets */
+ struct list_head list;
+ /**
+ * @complete: Completion routine, for TX packets
+ * @conn: FPGA Connection this packet was sent to
+ * @fdev: FPGA device this packet was sent to
+ * @buf: The packet buffer
+ * @status: 0 if successful, or an error code otherwise
+ */
+ void (*complete)(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf, u8 status);
+};
+
+/**
+ * struct mlx5_fpga_conn_attr - FPGA connection attributes
+ * Describes the attributes of a connection
+ */
+struct mlx5_fpga_conn_attr {
+ /** @tx_size: Size of connection TX queue, in packets */
+ unsigned int tx_size;
+ /** @rx_size: Size of connection RX queue, in packets */
+ unsigned int rx_size;
+ /**
+ * @recv_cb: Callback function which is called for received packets
+ * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg
+ * @buf: A buffer containing a received packet
+ *
+ * buf is guaranteed to only contain a single scatter-gather entry.
+ * The size of the actual packet received is specified in buf.sg[0].size
+ * When this callback returns, the packet buffer may be re-used for
+ * subsequent receives.
+ */
+ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+ void *cb_arg;
+};
+
+/**
+ * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection
+ * @fdev: The FPGA device
+ * @attr: Attributes of the new connection
+ *
+ * Sets up a new FPGA SBU connection with the specified attributes.
+ * The receive callback function may be called for incoming messages even
+ * before this function returns.
+ *
+ * The caller must eventually destroy the connection by calling
+ * mlx5_fpga_sbu_conn_destroy.
+ *
+ * Return: A new connection, or ERR_PTR() error value otherwise.
+ */
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr);
+
+/**
+ * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection
+ * @conn: The FPGA SBU connection to destroy
+ *
+ * Cleans up an FPGA SBU connection which was previously created with
+ * mlx5_fpga_sbu_conn_create.
+ */
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn);
+
+/**
+ * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet
+ * @fdev: An FPGA SBU connection
+ * @buf: The packet buffer
+ *
+ * Queues a packet for transmission over an FPGA SBU connection.
+ * The buffer should not be modified or freed until completion.
+ * Upon completion, the buf's complete() callback is invoked, indicating the
+ * success or error status of the transmission.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf);
+
+/**
+ * mlx5_fpga_mem_read() - Read from FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to read, in bytes
+ * @addr: Starting address to read from, in FPGA address space
+ * @buf: Buffer to read into
+ * @access_type: Method for reading
+ *
+ * Reads from the specified address into the specified buffer.
+ * The address may point to configuration space or to DDR.
+ * Large reads may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_mem_write() - Write to FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to write, in bytes
+ * @addr: Starting address to write to, in FPGA address space
+ * @buf: Buffer which contains data to write
+ * @access_type: Method for writing
+ *
+ * Writes the specified buffer data to FPGA memory at the specified address.
+ * The address may point to configuration space or to DDR.
+ * Large writes may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities
+ * @fdev: The FPGA device
+ * @size: Size of the buffer to read into
+ * @buf: Buffer to read the capabilities into
+ *
+ * Reads the FPGA SBU capabilities into the specified buffer.
+ * The format of the capabilities buffer is SBU-dependent.
+ *
+ * Return: 0 if successful
+ * -EINVAL if the buffer is not large enough to contain SBU caps
+ * or any other error value otherwise.
+ */
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf);
+
+#endif /* MLX5_FPGA_SDK_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
new file mode 100644
index 000000000..22a2ef111
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+#include "fpga/tls.h"
+#include "fpga/cmd.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+#include "accel/tls.h"
+
+struct mlx5_fpga_tls_command_context;
+
+typedef void (*mlx5_fpga_tls_command_complete)
+ (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *ctx,
+ struct mlx5_fpga_dma_buf *resp);
+
+struct mlx5_fpga_tls_command_context {
+ struct list_head list;
+ /* There is no guarantee on the order between the TX completion
+ * and the command response.
+ * The TX completion is going to touch cmd->buf even in
+ * the case of successful transmission.
+ * So instead of requiring separate allocations for cmd
+ * and cmd->buf we've decided to use a reference counter
+ */
+ refcount_t ref;
+ struct mlx5_fpga_dma_buf buf;
+ mlx5_fpga_tls_command_complete complete;
+};
+
+static void
+mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx)
+{
+ if (refcount_dec_and_test(&ctx->ref))
+ kfree(ctx);
+}
+
+static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *resp)
+{
+ struct mlx5_fpga_conn *conn = fdev->tls->conn;
+ struct mlx5_fpga_tls_command_context *ctx;
+ struct mlx5_fpga_tls *tls = fdev->tls;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tls->pending_cmds_lock, flags);
+ ctx = list_first_entry(&tls->pending_cmds,
+ struct mlx5_fpga_tls_command_context, list);
+ list_del(&ctx->list);
+ spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
+ ctx->complete(conn, fdev, ctx, resp);
+}
+
+static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf,
+ u8 status)
+{
+ struct mlx5_fpga_tls_command_context *ctx =
+ container_of(buf, struct mlx5_fpga_tls_command_context, buf);
+
+ mlx5_fpga_tls_put_command_ctx(ctx);
+
+ if (unlikely(status))
+ mlx5_fpga_tls_cmd_complete(fdev, NULL);
+}
+
+static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *cmd,
+ mlx5_fpga_tls_command_complete complete)
+{
+ struct mlx5_fpga_tls *tls = fdev->tls;
+ unsigned long flags;
+ int ret;
+
+ refcount_set(&cmd->ref, 2);
+ cmd->complete = complete;
+ cmd->buf.complete = mlx5_fpga_cmd_send_complete;
+
+ spin_lock_irqsave(&tls->pending_cmds_lock, flags);
+ /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock
+ * to make sure commands are inserted to the tls->pending_cmds list
+ * and the command QP in the same order.
+ */
+ ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf);
+ if (likely(!ret))
+ list_add_tail(&cmd->list, &tls->pending_cmds);
+ else
+ complete(tls->conn, fdev, cmd, NULL);
+ spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
+}
+
+/* Start of context identifiers range (inclusive) */
+#define SWID_START 0
+/* End of context identifiers range (exclusive) */
+#define SWID_END BIT(24)
+
+static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
+ void *ptr)
+{
+ unsigned long flags;
+ int ret;
+
+ /* TLS metadata format is 1 byte for syndrome followed
+ * by 3 bytes of swid (software ID)
+ * swid must not exceed 3 bytes.
+ * See tls_rxtx.c:insert_pet() for details
+ */
+ BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
+
+ idr_preload(GFP_KERNEL);
+ spin_lock_irqsave(idr_spinlock, flags);
+ ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
+ spin_unlock_irqrestore(idr_spinlock, flags);
+ idr_preload_end();
+
+ return ret;
+}
+
+static void *mlx5_fpga_tls_release_swid(struct idr *idr,
+ spinlock_t *idr_spinlock, u32 swid)
+{
+ unsigned long flags;
+ void *ptr;
+
+ spin_lock_irqsave(idr_spinlock, flags);
+ ptr = idr_remove(idr, swid);
+ spin_unlock_irqrestore(idr_spinlock, flags);
+ return ptr;
+}
+
+static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf, u8 status)
+{
+ kfree(buf);
+}
+
+static void
+mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *cmd,
+ struct mlx5_fpga_dma_buf *resp)
+{
+ if (resp) {
+ u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
+
+ if (syndrome)
+ mlx5_fpga_err(fdev,
+ "Teardown stream failed with syndrome = %d",
+ syndrome);
+ }
+ mlx5_fpga_tls_put_command_ctx(cmd);
+}
+
+static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
+{
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow,
+ MLX5_BYTE_OFF(tls_flow, ipv6));
+
+ MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6));
+ MLX5_SET(tls_cmd, cmd, direction_sx,
+ MLX5_GET(tls_flow, flow, direction_sx));
+}
+
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+ u64 rcd_sn)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE;
+ void *flow;
+ void *cmd;
+ int ret;
+
+ buf = kzalloc(size, GFP_ATOMIC);
+ if (!buf)
+ return -ENOMEM;
+
+ cmd = (buf + 1);
+
+ rcu_read_lock();
+ flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+ if (unlikely(!flow)) {
+ rcu_read_unlock();
+ WARN_ONCE(1, "Received NULL pointer for handle\n");
+ kfree(buf);
+ return -EINVAL;
+ }
+ mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+ rcu_read_unlock();
+
+ MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
+ MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
+ MLX5_SET(tls_cmd, cmd, tcp_sn, seq);
+ MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX);
+
+ buf->sg[0].data = cmd;
+ buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+ buf->complete = mlx_tls_kfree_complete;
+
+ ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
+ if (ret < 0)
+ kfree(buf);
+
+ return ret;
+}
+
+static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
+ void *flow, u32 swid, gfp_t flags)
+{
+ struct mlx5_fpga_tls_command_context *ctx;
+ struct mlx5_fpga_dma_buf *buf;
+ void *cmd;
+
+ ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags);
+ if (!ctx)
+ return;
+
+ buf = &ctx->buf;
+ cmd = (ctx + 1);
+ MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
+ MLX5_SET(tls_cmd, cmd, swid, swid);
+
+ mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+ kfree(flow);
+
+ buf->sg[0].data = cmd;
+ buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+
+ mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
+ mlx5_fpga_tls_teardown_completion);
+}
+
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ gfp_t flags, bool direction_sx)
+{
+ struct mlx5_fpga_tls *tls = mdev->fpga->tls;
+ void *flow;
+
+ if (direction_sx)
+ flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
+ &tls->tx_idr_spinlock,
+ swid);
+ else
+ flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
+ &tls->rx_idr_spinlock,
+ swid);
+
+ if (!flow) {
+ mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
+ swid);
+ return;
+ }
+
+ synchronize_rcu(); /* before kfree(flow) */
+ mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
+}
+
+enum mlx5_fpga_setup_stream_status {
+ MLX5_FPGA_CMD_PENDING,
+ MLX5_FPGA_CMD_SEND_FAILED,
+ MLX5_FPGA_CMD_RESPONSE_RECEIVED,
+ MLX5_FPGA_CMD_ABANDONED,
+};
+
+struct mlx5_setup_stream_context {
+ struct mlx5_fpga_tls_command_context cmd;
+ atomic_t status;
+ u32 syndrome;
+ struct completion comp;
+};
+
+static void
+mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *cmd,
+ struct mlx5_fpga_dma_buf *resp)
+{
+ struct mlx5_setup_stream_context *ctx =
+ container_of(cmd, struct mlx5_setup_stream_context, cmd);
+ int status = MLX5_FPGA_CMD_SEND_FAILED;
+ void *tls_cmd = ctx + 1;
+
+ /* If we failed to send to command resp == NULL */
+ if (resp) {
+ ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
+ status = MLX5_FPGA_CMD_RESPONSE_RECEIVED;
+ }
+
+ status = atomic_xchg_release(&ctx->status, status);
+ if (likely(status != MLX5_FPGA_CMD_ABANDONED)) {
+ complete(&ctx->comp);
+ return;
+ }
+
+ mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n",
+ ctx->syndrome);
+
+ if (!ctx->syndrome) {
+ /* The process was killed while waiting for the context to be
+ * added, and the add completed successfully.
+ * We need to destroy the HW context, and we can't can't reuse
+ * the command context because we might not have received
+ * the tx completion yet.
+ */
+ mlx5_fpga_tls_del_flow(fdev->mdev,
+ MLX5_GET(tls_cmd, tls_cmd, swid),
+ GFP_ATOMIC,
+ MLX5_GET(tls_cmd, tls_cmd,
+ direction_sx));
+ }
+
+ mlx5_fpga_tls_put_command_ctx(cmd);
+}
+
+static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev,
+ struct mlx5_setup_stream_context *ctx)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ void *cmd = ctx + 1;
+ int status, ret = 0;
+
+ buf = &ctx->cmd.buf;
+ buf->sg[0].data = cmd;
+ buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+ MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM);
+
+ init_completion(&ctx->comp);
+ atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING);
+ ctx->syndrome = -1;
+
+ mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+ mlx5_fpga_tls_setup_completion);
+ wait_for_completion_killable(&ctx->comp);
+
+ status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED);
+ if (unlikely(status == MLX5_FPGA_CMD_PENDING))
+ /* ctx is going to be released in mlx5_fpga_tls_setup_completion */
+ return -EINTR;
+
+ if (unlikely(ctx->syndrome))
+ ret = -ENOMEM;
+
+ mlx5_fpga_tls_put_command_ctx(&ctx->cmd);
+ return ret;
+}
+
+static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg;
+
+ mlx5_fpga_tls_cmd_complete(fdev, buf);
+}
+
+bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev)
+{
+ if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+ MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+ MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS)
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0)
+ return false;
+
+ return true;
+}
+
+static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev,
+ u32 *p_caps)
+{
+ int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap);
+ u32 caps = 0;
+ void *buf;
+
+ buf = kzalloc(cap_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf);
+ if (err)
+ goto out;
+
+ if (MLX5_GET(tls_extended_cap, buf, tx))
+ caps |= MLX5_ACCEL_TLS_TX;
+ if (MLX5_GET(tls_extended_cap, buf, rx))
+ caps |= MLX5_ACCEL_TLS_RX;
+ if (MLX5_GET(tls_extended_cap, buf, tls_v12))
+ caps |= MLX5_ACCEL_TLS_V12;
+ if (MLX5_GET(tls_extended_cap, buf, tls_v13))
+ caps |= MLX5_ACCEL_TLS_V13;
+ if (MLX5_GET(tls_extended_cap, buf, lro))
+ caps |= MLX5_ACCEL_TLS_LRO;
+ if (MLX5_GET(tls_extended_cap, buf, ipv6))
+ caps |= MLX5_ACCEL_TLS_IPV6;
+
+ if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128))
+ caps |= MLX5_ACCEL_TLS_AES_GCM128;
+ if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256))
+ caps |= MLX5_ACCEL_TLS_AES_GCM256;
+
+ *p_caps = caps;
+ err = 0;
+out:
+ kfree(buf);
+ return err;
+}
+
+int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_conn_attr init_attr = {0};
+ struct mlx5_fpga_conn *conn;
+ struct mlx5_fpga_tls *tls;
+ int err = 0;
+
+ if (!mlx5_fpga_is_tls_device(mdev) || !fdev)
+ return 0;
+
+ tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+ if (!tls)
+ return -ENOMEM;
+
+ err = mlx5_fpga_tls_get_caps(fdev, &tls->caps);
+ if (err)
+ goto error;
+
+ if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) {
+ err = -ENOTSUPP;
+ goto error;
+ }
+
+ init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb;
+ init_attr.cb_arg = fdev;
+ conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+ if (IS_ERR(conn)) {
+ err = PTR_ERR(conn);
+ mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n",
+ err);
+ goto error;
+ }
+
+ tls->conn = conn;
+ spin_lock_init(&tls->pending_cmds_lock);
+ INIT_LIST_HEAD(&tls->pending_cmds);
+
+ idr_init(&tls->tx_idr);
+ idr_init(&tls->rx_idr);
+ spin_lock_init(&tls->tx_idr_spinlock);
+ spin_lock_init(&tls->rx_idr_spinlock);
+ fdev->tls = tls;
+ return 0;
+
+error:
+ kfree(tls);
+ return err;
+}
+
+void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ if (!fdev || !fdev->tls)
+ return;
+
+ mlx5_fpga_sbu_conn_destroy(fdev->tls->conn);
+ kfree(fdev->tls);
+ fdev->tls = NULL;
+}
+
+static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd,
+ struct tls_crypto_info *info,
+ __be64 *rcd_sn)
+{
+ struct tls12_crypto_info_aes_gcm_128 *crypto_info =
+ (struct tls12_crypto_info_aes_gcm_128 *)info;
+
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv),
+ crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key),
+ crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+
+ /* in AES-GCM 128 we need to write the key twice */
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) +
+ TLS_CIPHER_AES_GCM_128_KEY_SIZE,
+ crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+
+ MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128);
+}
+
+static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
+ struct tls_crypto_info *crypto_info)
+{
+ __be64 rcd_sn;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ if (!(caps & MLX5_ACCEL_TLS_AES_GCM128))
+ return -EINVAL;
+ mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 swid, u32 tcp_sn)
+{
+ u32 caps = mlx5_fpga_tls_device_caps(mdev);
+ struct mlx5_setup_stream_context *ctx;
+ int ret = -ENOMEM;
+ size_t cmd_size;
+ void *cmd;
+
+ cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx);
+ ctx = kzalloc(cmd_size, GFP_KERNEL);
+ if (!ctx)
+ goto out;
+
+ cmd = ctx + 1;
+ ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info);
+ if (ret)
+ goto free_ctx;
+
+ mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+
+ MLX5_SET(tls_cmd, cmd, swid, swid);
+ MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn);
+
+ return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx);
+
+free_ctx:
+ kfree(ctx);
+out:
+ return ret;
+}
+
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx)
+{
+ struct mlx5_fpga_tls *tls = mdev->fpga->tls;
+ int ret = -ENOMEM;
+ u32 swid;
+
+ if (direction_sx)
+ ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr,
+ &tls->tx_idr_spinlock, flow);
+ else
+ ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr,
+ &tls->rx_idr_spinlock, flow);
+
+ if (ret < 0)
+ return ret;
+
+ swid = ret;
+ MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0);
+
+ ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
+ start_offload_tcp_sn);
+ if (ret && ret != -EINTR)
+ goto free_swid;
+
+ *p_swid = swid;
+ return 0;
+free_swid:
+ if (direction_sx)
+ mlx5_fpga_tls_release_swid(&tls->tx_idr,
+ &tls->tx_idr_spinlock, swid);
+ else
+ mlx5_fpga_tls_release_swid(&tls->rx_idr,
+ &tls->rx_idr_spinlock, swid);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
new file mode 100644
index 000000000..3b2e37bf7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_TLS_H__
+#define __MLX5_FPGA_TLS_H__
+
+#include <linux/mlx5/driver.h>
+
+#include <net/tls.h>
+#include "fpga/core.h"
+
+struct mlx5_fpga_tls {
+ struct list_head pending_cmds;
+ spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+ u32 caps;
+ struct mlx5_fpga_conn *conn;
+
+ struct idr tx_idr;
+ struct idr rx_idr;
+ spinlock_t tx_idr_spinlock; /* protects the IDR */
+ spinlock_t rx_idr_spinlock; /* protects the IDR */
+};
+
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx);
+
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ gfp_t flags, bool direction_sx);
+
+bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
+int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev);
+
+static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
+{
+ return mdev->fpga->tls->caps;
+}
+
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+ u64 rcd_sn);
+
+#endif /* __MLX5_FPGA_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
new file mode 100644
index 000000000..8e01f8180
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -0,0 +1,768 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/mlx5_ifc.h>
+
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 underlay_qpn,
+ bool disconnect)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev,
+ u16 vport,
+ enum fs_flow_table_op_mod op_mod,
+ enum fs_flow_table_type type,
+ unsigned int level,
+ unsigned int log_size,
+ struct mlx5_flow_table *next_ft,
+ unsigned int *table_id, u32 flags)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ return 0;
+}
+
+static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft, u32 underlay_qpn,
+ bool disconnect)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
+
+ if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
+ underlay_qpn == 0)
+ return 0;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
+
+ if (disconnect) {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 1);
+ MLX5_SET(set_flow_table_root_in, in, table_id, 0);
+ } else {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+ MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+ }
+
+ MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn);
+ if (ft->vport) {
+ MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
+ MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
+ u16 vport,
+ enum fs_flow_table_op_mod op_mod,
+ enum fs_flow_table_type type,
+ unsigned int level,
+ unsigned int log_size,
+ struct mlx5_flow_table *next_ft,
+ unsigned int *table_id, u32 flags)
+{
+ int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
+ u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0};
+ int err;
+
+ MLX5_SET(create_flow_table_in, in, opcode,
+ MLX5_CMD_OP_CREATE_FLOW_TABLE);
+
+ MLX5_SET(create_flow_table_in, in, table_type, type);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.level, level);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, log_size);
+ if (vport) {
+ MLX5_SET(create_flow_table_in, in, vport_number, vport);
+ MLX5_SET(create_flow_table_in, in, other_vport, 1);
+ }
+
+ MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
+ en_encap_decap);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en,
+ en_encap_decap);
+
+ switch (op_mod) {
+ case FS_FT_OP_MOD_NORMAL:
+ if (next_ft) {
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.table_miss_action, 1);
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.table_miss_id, next_ft->id);
+ }
+ break;
+
+ case FS_FT_OP_MOD_LAG_DEMUX:
+ MLX5_SET(create_flow_table_in, in, op_mod, 0x1);
+ if (next_ft)
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.lag_master_next_table_id,
+ next_ft->id);
+ break;
+ }
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *table_id = MLX5_GET(create_flow_table_out, out,
+ table_id);
+ return err;
+}
+
+static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
+
+ MLX5_SET(destroy_flow_table_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+ MLX5_SET(destroy_flow_table_in, in, table_type, ft->type);
+ MLX5_SET(destroy_flow_table_in, in, table_id, ft->id);
+ if (ft->vport) {
+ MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport);
+ MLX5_SET(destroy_flow_table_in, in, other_vport, 1);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
+
+ MLX5_SET(modify_flow_table_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_FLOW_TABLE);
+ MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
+ MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
+
+ if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) {
+ MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID);
+ if (next_ft) {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.lag_master_next_table_id, next_ft->id);
+ } else {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.lag_master_next_table_id, 0);
+ }
+ } else {
+ if (ft->vport) {
+ MLX5_SET(modify_flow_table_in, in, vport_number,
+ ft->vport);
+ MLX5_SET(modify_flow_table_in, in, other_vport, 1);
+ }
+ MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
+ if (next_ft) {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.table_miss_action, 1);
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.table_miss_id,
+ next_ft->id);
+ } else {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.table_miss_action, 0);
+ }
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id)
+{
+ u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int err;
+
+ MLX5_SET(create_flow_group_in, in, opcode,
+ MLX5_CMD_OP_CREATE_FLOW_GROUP);
+ MLX5_SET(create_flow_group_in, in, table_type, ft->type);
+ MLX5_SET(create_flow_group_in, in, table_id, ft->id);
+ if (ft->vport) {
+ MLX5_SET(create_flow_group_in, in, vport_number, ft->vport);
+ MLX5_SET(create_flow_group_in, in, other_vport, 1);
+ }
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *group_id = MLX5_GET(create_flow_group_out, out,
+ group_id);
+ return err;
+}
+
+static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {0};
+
+ MLX5_SET(destroy_flow_group_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+ MLX5_SET(destroy_flow_group_in, in, table_type, ft->type);
+ MLX5_SET(destroy_flow_group_in, in, table_id, ft->id);
+ MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
+ if (ft->vport) {
+ MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport);
+ MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
+ int opmod, int modify_mask,
+ struct mlx5_flow_table *ft,
+ unsigned group_id,
+ struct fs_fte *fte)
+{
+ unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
+ fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
+ u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
+ struct mlx5_flow_rule *dst;
+ void *in_flow_context, *vlan;
+ void *in_match_value;
+ void *in_dests;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+ MLX5_SET(set_fte_in, in, op_mod, opmod);
+ MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask);
+ MLX5_SET(set_fte_in, in, table_type, ft->type);
+ MLX5_SET(set_fte_in, in, table_id, ft->id);
+ MLX5_SET(set_fte_in, in, flow_index, fte->index);
+ if (ft->vport) {
+ MLX5_SET(set_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(set_fte_in, in, other_vport, 1);
+ }
+
+ in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+ MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+
+ MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
+ MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+ MLX5_SET(flow_context, in_flow_context, modify_header_id,
+ fte->action.modify_id);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
+
+ in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
+ match_value);
+ memcpy(in_match_value, &fte->val, sizeof(fte->val));
+
+ in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ int list_size = 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ unsigned int id, type = dst->dest_attr.type;
+
+ if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ id = dst->dest_attr.ft_num;
+ type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ id = dst->dest_attr.ft->id;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ id = dst->dest_attr.vport.num;
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid,
+ dst->dest_attr.vport.vhca_id_valid);
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id,
+ dst->dest_attr.vport.vhca_id);
+ break;
+ default:
+ id = dst->dest_attr.tir_num;
+ }
+
+ MLX5_SET(dest_format_struct, in_dests, destination_type,
+ type);
+ MLX5_SET(dest_format_struct, in_dests, destination_id, id);
+ in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ list_size++;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, destination_list_size,
+ list_size);
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
+ log_max_flow_counter,
+ ft->type));
+ int list_size = 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ if (dst->dest_attr.type !=
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
+ dst->dest_attr.counter->id);
+ in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ list_size++;
+ }
+ if (list_size > max_list_size) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, flow_counter_list_size,
+ list_size);
+ }
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+err_out:
+ kvfree(in);
+ return err;
+}
+
+static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte)
+{
+ unsigned int group_id = group->id;
+
+ return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
+}
+
+static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ int opmod;
+ int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.
+ flow_modify_en);
+ if (!atomic_mod_cap)
+ return -EOPNOTSUPP;
+ opmod = 1;
+
+ return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
+}
+
+static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {0};
+
+ MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+ MLX5_SET(delete_fte_in, in, table_type, ft->type);
+ MLX5_SET(delete_fte_in, in, table_id, ft->id);
+ MLX5_SET(delete_fte_in, in, flow_index, fte->index);
+ if (ft->vport) {
+ MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(delete_fte_in, in, other_vport, 1);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+ return err;
+}
+
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0};
+
+ MLX5_SET(dealloc_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+ MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
+ u64 *packets, u64 *bytes)
+{
+ u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+ MLX5_ST_SZ_BYTES(traffic_counter)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+ void *stats;
+ int err = 0;
+
+ MLX5_SET(query_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+ MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+ MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
+ *packets = MLX5_GET64(traffic_counter, stats, packets);
+ *bytes = MLX5_GET64(traffic_counter, stats, octets);
+ return 0;
+}
+
+struct mlx5_cmd_fc_bulk {
+ u32 id;
+ int num;
+ int outlen;
+ u32 out[0];
+};
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num)
+{
+ struct mlx5_cmd_fc_bulk *b;
+ int outlen =
+ MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+ MLX5_ST_SZ_BYTES(traffic_counter) * num;
+
+ b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
+ if (!b)
+ return NULL;
+
+ b->id = id;
+ b->num = num;
+ b->outlen = outlen;
+
+ return b;
+}
+
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
+{
+ kfree(b);
+}
+
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
+{
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+
+ MLX5_SET(query_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+ MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+ MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
+ MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
+ return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen);
+}
+
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_fc_bulk *b, u32 id,
+ u64 *packets, u64 *bytes)
+{
+ int index = id - b->id;
+ void *stats;
+
+ if (index < 0 || index >= b->num) {
+ mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n",
+ id, b->id, b->id + b->num - 1);
+ return;
+ }
+
+ stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
+ flow_statistics[index]);
+ *packets = MLX5_GET64(traffic_counter, stats, packets);
+ *bytes = MLX5_GET64(traffic_counter, stats, octets);
+}
+
+int mlx5_encap_alloc(struct mlx5_core_dev *dev,
+ int header_type,
+ size_t size,
+ void *encap_header,
+ u32 *encap_id)
+{
+ int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
+ u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
+ void *encap_header_in;
+ void *header;
+ int inlen;
+ int err;
+ u32 *in;
+
+ if (size > max_encap_size) {
+ mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n",
+ size, max_encap_size);
+ return -EINVAL;
+ }
+
+ in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size,
+ GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header);
+ header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header);
+ inlen = header - (void *)in + size;
+
+ memset(in, 0, inlen);
+ MLX5_SET(alloc_encap_header_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
+ MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
+ MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
+ memcpy(header, encap_header, size);
+
+ memset(out, 0, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+ *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+ kfree(in);
+ return err;
+}
+
+void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
+ u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+
+ memset(in, 0, sizeof(in));
+ MLX5_SET(dealloc_encap_header_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+ MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+ u8 namespace, u8 num_actions,
+ void *modify_actions, u32 *modify_header_id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)];
+ int max_actions, actions_size, inlen, err;
+ void *actions_in;
+ u8 table_type;
+ u32 *in;
+
+ switch (namespace) {
+ case MLX5_FLOW_NAMESPACE_FDB:
+ max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions);
+ table_type = FS_FT_FDB;
+ break;
+ case MLX5_FLOW_NAMESPACE_KERNEL:
+ max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
+ table_type = FS_FT_NIC_RX;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (num_actions > max_actions) {
+ mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n",
+ num_actions, max_actions);
+ return -EOPNOTSUPP;
+ }
+
+ actions_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * num_actions;
+ inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(alloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type);
+ MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions);
+
+ actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions);
+ memcpy(actions_in, modify_actions, actions_size);
+
+ memset(out, 0, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+ *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
+ kfree(in);
+ return err;
+}
+
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)];
+ u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)];
+
+ memset(in, 0, sizeof(in));
+ MLX5_SET(dealloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
+ modify_header_id);
+
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds = {
+ .create_flow_table = mlx5_cmd_create_flow_table,
+ .destroy_flow_table = mlx5_cmd_destroy_flow_table,
+ .modify_flow_table = mlx5_cmd_modify_flow_table,
+ .create_flow_group = mlx5_cmd_create_flow_group,
+ .destroy_flow_group = mlx5_cmd_destroy_flow_group,
+ .create_fte = mlx5_cmd_create_fte,
+ .update_fte = mlx5_cmd_update_fte,
+ .delete_fte = mlx5_cmd_delete_fte,
+ .update_root_ft = mlx5_cmd_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
+ .create_flow_table = mlx5_cmd_stub_create_flow_table,
+ .destroy_flow_table = mlx5_cmd_stub_destroy_flow_table,
+ .modify_flow_table = mlx5_cmd_stub_modify_flow_table,
+ .create_flow_group = mlx5_cmd_stub_create_flow_group,
+ .destroy_flow_group = mlx5_cmd_stub_destroy_flow_group,
+ .create_fte = mlx5_cmd_stub_create_fte,
+ .update_fte = mlx5_cmd_stub_update_fte,
+ .delete_fte = mlx5_cmd_stub_delete_fte,
+ .update_root_ft = mlx5_cmd_stub_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
+{
+ return &mlx5_flow_cmds;
+}
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void)
+{
+ return &mlx5_flow_cmd_stubs;
+}
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type)
+{
+ switch (type) {
+ case FS_FT_NIC_RX:
+ case FS_FT_ESW_EGRESS_ACL:
+ case FS_FT_ESW_INGRESS_ACL:
+ case FS_FT_FDB:
+ case FS_FT_SNIFFER_RX:
+ case FS_FT_SNIFFER_TX:
+ return mlx5_fs_cmd_get_fw_cmds();
+ case FS_FT_NIC_TX:
+ default:
+ return mlx5_fs_cmd_get_stub_cmds();
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
new file mode 100644
index 000000000..6228ba7bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_CMD_
+#define _MLX5_FS_CMD_
+
+#include "fs_core.h"
+
+struct mlx5_flow_cmds {
+ int (*create_flow_table)(struct mlx5_core_dev *dev,
+ u16 vport,
+ enum fs_flow_table_op_mod op_mod,
+ enum fs_flow_table_type type,
+ unsigned int level, unsigned int log_size,
+ struct mlx5_flow_table *next_ft,
+ unsigned int *table_id, u32 flags);
+ int (*destroy_flow_table)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft);
+
+ int (*modify_flow_table)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft);
+
+ int (*create_flow_group)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id);
+
+ int (*destroy_flow_group)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id);
+
+ int (*create_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte);
+
+ int (*update_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte);
+
+ int (*delete_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte);
+
+ int (*update_root_ft)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 underlay_qpn,
+ bool disconnect);
+};
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
+ u64 *packets, u64 *bytes);
+
+struct mlx5_cmd_fc_bulk;
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num);
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_fc_bulk *b, u32 id,
+ u64 *packets, u64 *bytes);
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
new file mode 100644
index 000000000..f0aa7f0e5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -0,0 +1,2720 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mutex.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "diag/fs_tracepoint.h"
+#include "accel/ipsec.h"
+#include "fpga/ipsec.h"
+
+#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
+ sizeof(struct init_tree_node))
+
+#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\
+ ...) {.type = FS_TYPE_PRIO,\
+ .min_ft_level = min_level_val,\
+ .num_levels = num_levels_val,\
+ .num_leaf_prios = num_prios_val,\
+ .caps = caps_val,\
+ .children = (struct init_tree_node[]) {__VA_ARGS__},\
+ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
+}
+
+#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\
+ ADD_PRIO(num_prios_val, 0, num_levels_val, {},\
+ __VA_ARGS__)\
+
+#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\
+ .children = (struct init_tree_node[]) {__VA_ARGS__},\
+ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
+}
+
+#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
+ sizeof(long))
+
+#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
+
+#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
+ .caps = (long[]) {__VA_ARGS__} }
+
+#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
+ FS_CAP(flow_table_properties_nic_receive.modify_root), \
+ FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
+ FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
+
+#define LEFTOVERS_NUM_LEVELS 1
+#define LEFTOVERS_NUM_PRIOS 1
+
+#define BY_PASS_PRIO_NUM_LEVELS 1
+#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
+ LEFTOVERS_NUM_PRIOS)
+
+#define ETHTOOL_PRIO_NUM_LEVELS 1
+#define ETHTOOL_NUM_PRIOS 11
+#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
+/* Vlan, mac, ttc, inner ttc, aRFS */
+#define KERNEL_NIC_PRIO_NUM_LEVELS 5
+#define KERNEL_NIC_NUM_PRIOS 1
+/* One more level for tc */
+#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
+
+#define KERNEL_NIC_TC_NUM_PRIOS 1
+#define KERNEL_NIC_TC_NUM_LEVELS 2
+
+#define ANCHOR_NUM_LEVELS 1
+#define ANCHOR_NUM_PRIOS 1
+#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
+
+#define OFFLOADS_MAX_FT 1
+#define OFFLOADS_NUM_PRIOS 1
+#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
+
+#define LAG_PRIO_NUM_LEVELS 1
+#define LAG_NUM_PRIOS 1
+#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
+
+struct node_caps {
+ size_t arr_sz;
+ long *caps;
+};
+
+static struct init_tree_node {
+ enum fs_node_type type;
+ struct init_tree_node *children;
+ int ar_size;
+ struct node_caps caps;
+ int min_ft_level;
+ int num_leaf_prios;
+ int prio;
+ int num_levels;
+} root_fs = {
+ .type = FS_TYPE_NAMESPACE,
+ .ar_size = 7,
+ .children = (struct init_tree_node[]) {
+ ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+ BY_PASS_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, LAG_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
+ LAG_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
+ ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))),
+ ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
+ ETHTOOL_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
+ ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS),
+ ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
+ KERNEL_NIC_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))),
+ ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
+ ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))),
+ }
+};
+
+enum fs_i_lock_class {
+ FS_LOCK_GRANDPARENT,
+ FS_LOCK_PARENT,
+ FS_LOCK_CHILD
+};
+
+static const struct rhashtable_params rhash_fte = {
+ .key_len = FIELD_SIZEOF(struct fs_fte, val),
+ .key_offset = offsetof(struct fs_fte, val),
+ .head_offset = offsetof(struct fs_fte, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+};
+
+static const struct rhashtable_params rhash_fg = {
+ .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask),
+ .key_offset = offsetof(struct mlx5_flow_group, mask),
+ .head_offset = offsetof(struct mlx5_flow_group, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+
+};
+
+static void del_hw_flow_table(struct fs_node *node);
+static void del_hw_flow_group(struct fs_node *node);
+static void del_hw_fte(struct fs_node *node);
+static void del_sw_flow_table(struct fs_node *node);
+static void del_sw_flow_group(struct fs_node *node);
+static void del_sw_fte(struct fs_node *node);
+static void del_sw_prio(struct fs_node *node);
+static void del_sw_ns(struct fs_node *node);
+/* Delete rule (destination) is special case that
+ * requires to lock the FTE for all the deletion process.
+ */
+static void del_sw_hw_rule(struct fs_node *node);
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+ struct mlx5_flow_destination *d2);
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
+static struct mlx5_flow_rule *
+find_flow_rule(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest);
+
+static void tree_init_node(struct fs_node *node,
+ void (*del_hw_func)(struct fs_node *),
+ void (*del_sw_func)(struct fs_node *))
+{
+ refcount_set(&node->refcount, 1);
+ INIT_LIST_HEAD(&node->list);
+ INIT_LIST_HEAD(&node->children);
+ init_rwsem(&node->lock);
+ node->del_hw_func = del_hw_func;
+ node->del_sw_func = del_sw_func;
+ node->active = false;
+}
+
+static void tree_add_node(struct fs_node *node, struct fs_node *parent)
+{
+ if (parent)
+ refcount_inc(&parent->refcount);
+ node->parent = parent;
+
+ /* Parent is the root */
+ if (!parent)
+ node->root = node;
+ else
+ node->root = parent->root;
+}
+
+static int tree_get_node(struct fs_node *node)
+{
+ return refcount_inc_not_zero(&node->refcount);
+}
+
+static void nested_down_read_ref_node(struct fs_node *node,
+ enum fs_i_lock_class class)
+{
+ if (node) {
+ down_read_nested(&node->lock, class);
+ refcount_inc(&node->refcount);
+ }
+}
+
+static void nested_down_write_ref_node(struct fs_node *node,
+ enum fs_i_lock_class class)
+{
+ if (node) {
+ down_write_nested(&node->lock, class);
+ refcount_inc(&node->refcount);
+ }
+}
+
+static void down_write_ref_node(struct fs_node *node)
+{
+ if (node) {
+ down_write(&node->lock);
+ refcount_inc(&node->refcount);
+ }
+}
+
+static void up_read_ref_node(struct fs_node *node)
+{
+ refcount_dec(&node->refcount);
+ up_read(&node->lock);
+}
+
+static void up_write_ref_node(struct fs_node *node)
+{
+ refcount_dec(&node->refcount);
+ up_write(&node->lock);
+}
+
+static void tree_put_node(struct fs_node *node)
+{
+ struct fs_node *parent_node = node->parent;
+
+ if (refcount_dec_and_test(&node->refcount)) {
+ if (node->del_hw_func)
+ node->del_hw_func(node);
+ if (parent_node) {
+ /* Only root namespace doesn't have parent and we just
+ * need to free its node.
+ */
+ down_write_ref_node(parent_node);
+ list_del_init(&node->list);
+ if (node->del_sw_func)
+ node->del_sw_func(node);
+ up_write_ref_node(parent_node);
+ } else {
+ kfree(node);
+ }
+ node = NULL;
+ }
+ if (!node && parent_node)
+ tree_put_node(parent_node);
+}
+
+static int tree_remove_node(struct fs_node *node)
+{
+ if (refcount_read(&node->refcount) > 1) {
+ refcount_dec(&node->refcount);
+ return -EEXIST;
+ }
+ tree_put_node(node);
+ return 0;
+}
+
+static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio)
+{
+ struct fs_prio *iter_prio;
+
+ fs_for_each_prio(iter_prio, ns) {
+ if (iter_prio->prio == prio)
+ return iter_prio;
+ }
+
+ return NULL;
+}
+
+static bool check_valid_spec(const struct mlx5_flow_spec *spec)
+{
+ int i;
+
+ for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
+ if (spec->match_value[i] & ~spec->match_criteria[i]) {
+ pr_warn("mlx5_core: match_value differs from match_criteria\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+{
+ struct fs_node *root;
+ struct mlx5_flow_namespace *ns;
+
+ root = node->root;
+
+ if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) {
+ pr_warn("mlx5: flow steering node is not in tree or garbaged\n");
+ return NULL;
+ }
+
+ ns = container_of(root, struct mlx5_flow_namespace, node);
+ return container_of(ns, struct mlx5_flow_root_namespace, ns);
+}
+
+static inline struct mlx5_flow_steering *get_steering(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root = find_root(node);
+
+ if (root)
+ return root->dev->priv.steering;
+ return NULL;
+}
+
+static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root = find_root(node);
+
+ if (root)
+ return root->dev;
+ return NULL;
+}
+
+static void del_sw_ns(struct fs_node *node)
+{
+ kfree(node);
+}
+
+static void del_sw_prio(struct fs_node *node)
+{
+ kfree(node);
+}
+
+static void del_hw_flow_table(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_core_dev *dev;
+ int err;
+
+ fs_get_obj(ft, node);
+ dev = get_dev(&ft->node);
+ root = find_root(&ft->node);
+
+ if (node->active) {
+ err = root->cmds->destroy_flow_table(dev, ft);
+ if (err)
+ mlx5_core_warn(dev, "flow steering can't destroy ft\n");
+ }
+}
+
+static void del_sw_flow_table(struct fs_node *node)
+{
+ struct mlx5_flow_table *ft;
+ struct fs_prio *prio;
+
+ fs_get_obj(ft, node);
+
+ rhltable_destroy(&ft->fgs_hash);
+ fs_get_obj(prio, ft->node.parent);
+ prio->num_ft--;
+ kfree(ft);
+}
+
+static void del_sw_hw_rule(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_rule *rule;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct fs_fte *fte;
+ int modify_mask;
+ struct mlx5_core_dev *dev = get_dev(node);
+ int err;
+ bool update_fte = false;
+
+ fs_get_obj(rule, node);
+ fs_get_obj(fte, rule->node.parent);
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+ trace_mlx5_fs_del_rule(rule);
+ if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ mutex_lock(&rule->dest_attr.ft->lock);
+ list_del(&rule->next_ft);
+ mutex_unlock(&rule->dest_attr.ft->lock);
+ }
+
+ if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
+ --fte->dests_size) {
+ modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+ fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ update_fte = true;
+ goto out;
+ }
+
+ if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+ --fte->dests_size) {
+ modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ update_fte = true;
+ }
+out:
+ root = find_root(&ft->node);
+ if (update_fte && fte->dests_size) {
+ err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte);
+ if (err)
+ mlx5_core_warn(dev,
+ "%s can't del rule fg id=%d fte_index=%d\n",
+ __func__, fg->id, fte->index);
+ }
+ kfree(rule);
+}
+
+static void del_hw_fte(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_core_dev *dev;
+ struct fs_fte *fte;
+ int err;
+
+ fs_get_obj(fte, node);
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+
+ trace_mlx5_fs_del_fte(fte);
+ dev = get_dev(&ft->node);
+ root = find_root(&ft->node);
+ if (node->active) {
+ err = root->cmds->delete_fte(dev, ft, fte);
+ if (err)
+ mlx5_core_warn(dev,
+ "flow steering can't delete fte in index %d of flow group id %d\n",
+ fte->index, fg->id);
+ node->active = 0;
+ }
+}
+
+static void del_sw_fte(struct fs_node *node)
+{
+ struct mlx5_flow_steering *steering = get_steering(node);
+ struct mlx5_flow_group *fg;
+ struct fs_fte *fte;
+ int err;
+
+ fs_get_obj(fte, node);
+ fs_get_obj(fg, fte->node.parent);
+
+ err = rhashtable_remove_fast(&fg->ftes_hash,
+ &fte->hash,
+ rhash_fte);
+ WARN_ON(err);
+ ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
+ kmem_cache_free(steering->ftes_cache, fte);
+}
+
+static void del_hw_flow_group(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_table *ft;
+ struct mlx5_core_dev *dev;
+
+ fs_get_obj(fg, node);
+ fs_get_obj(ft, fg->node.parent);
+ dev = get_dev(&ft->node);
+ trace_mlx5_fs_del_fg(fg);
+
+ root = find_root(&ft->node);
+ if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id))
+ mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
+ fg->id, ft->id);
+}
+
+static void del_sw_flow_group(struct fs_node *node)
+{
+ struct mlx5_flow_steering *steering = get_steering(node);
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ fs_get_obj(fg, node);
+ fs_get_obj(ft, fg->node.parent);
+
+ rhashtable_destroy(&fg->ftes_hash);
+ ida_destroy(&fg->fte_allocator);
+ if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size)
+ ft->autogroup.num_groups--;
+ err = rhltable_remove(&ft->fgs_hash,
+ &fg->hash,
+ rhash_fg);
+ WARN_ON(err);
+ kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
+{
+ int index;
+ int ret;
+
+ index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL);
+ if (index < 0)
+ return index;
+
+ fte->index = index + fg->start_index;
+ ret = rhashtable_insert_fast(&fg->ftes_hash,
+ &fte->hash,
+ rhash_fte);
+ if (ret)
+ goto err_ida_remove;
+
+ tree_add_node(&fte->node, &fg->node);
+ list_add_tail(&fte->node.list, &fg->node.children);
+ return 0;
+
+err_ida_remove:
+ ida_simple_remove(&fg->fte_allocator, index);
+ return ret;
+}
+
+static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
+ u32 *match_value,
+ struct mlx5_flow_act *flow_act)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct fs_fte *fte;
+
+ fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL);
+ if (!fte)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(fte->val, match_value, sizeof(fte->val));
+ fte->node.type = FS_TYPE_FLOW_ENTRY;
+ fte->action = *flow_act;
+
+ tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
+
+ return fte;
+}
+
+static void dealloc_flow_group(struct mlx5_flow_steering *steering,
+ struct mlx5_flow_group *fg)
+{
+ rhashtable_destroy(&fg->ftes_hash);
+ kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
+ u8 match_criteria_enable,
+ void *match_criteria,
+ int start_index,
+ int end_index)
+{
+ struct mlx5_flow_group *fg;
+ int ret;
+
+ fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL);
+ if (!fg)
+ return ERR_PTR(-ENOMEM);
+
+ ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
+ if (ret) {
+ kmem_cache_free(steering->fgs_cache, fg);
+ return ERR_PTR(ret);
+}
+ ida_init(&fg->fte_allocator);
+ fg->mask.match_criteria_enable = match_criteria_enable;
+ memcpy(&fg->mask.match_criteria, match_criteria,
+ sizeof(fg->mask.match_criteria));
+ fg->node.type = FS_TYPE_FLOW_GROUP;
+ fg->start_index = start_index;
+ fg->max_ftes = end_index - start_index + 1;
+
+ return fg;
+}
+
+static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
+ u8 match_criteria_enable,
+ void *match_criteria,
+ int start_index,
+ int end_index,
+ struct list_head *prev)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *fg;
+ int ret;
+
+ fg = alloc_flow_group(steering, match_criteria_enable, match_criteria,
+ start_index, end_index);
+ if (IS_ERR(fg))
+ return fg;
+
+ /* initialize refcnt, add to parent list */
+ ret = rhltable_insert(&ft->fgs_hash,
+ &fg->hash,
+ rhash_fg);
+ if (ret) {
+ dealloc_flow_group(steering, fg);
+ return ERR_PTR(ret);
+ }
+
+ tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group);
+ tree_add_node(&fg->node, &ft->node);
+ /* Add node to group list */
+ list_add(&fg->node.list, prev);
+ atomic_inc(&ft->node.version);
+
+ return fg;
+}
+
+static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
+ enum fs_flow_table_type table_type,
+ enum fs_flow_table_op_mod op_mod,
+ u32 flags)
+{
+ struct mlx5_flow_table *ft;
+ int ret;
+
+ ft = kzalloc(sizeof(*ft), GFP_KERNEL);
+ if (!ft)
+ return ERR_PTR(-ENOMEM);
+
+ ret = rhltable_init(&ft->fgs_hash, &rhash_fg);
+ if (ret) {
+ kfree(ft);
+ return ERR_PTR(ret);
+ }
+
+ ft->level = level;
+ ft->node.type = FS_TYPE_FLOW_TABLE;
+ ft->op_mod = op_mod;
+ ft->type = table_type;
+ ft->vport = vport;
+ ft->max_fte = max_fte;
+ ft->flags = flags;
+ INIT_LIST_HEAD(&ft->fwd_rules);
+ mutex_init(&ft->lock);
+
+ return ft;
+}
+
+/* If reverse is false, then we search for the first flow table in the
+ * root sub-tree from start(closest from right), else we search for the
+ * last flow table in the root sub-tree till start(closest from left).
+ */
+static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
+ struct list_head *start,
+ bool reverse)
+{
+#define list_advance_entry(pos, reverse) \
+ ((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
+
+#define list_for_each_advance_continue(pos, head, reverse) \
+ for (pos = list_advance_entry(pos, reverse); \
+ &pos->list != (head); \
+ pos = list_advance_entry(pos, reverse))
+
+ struct fs_node *iter = list_entry(start, struct fs_node, list);
+ struct mlx5_flow_table *ft = NULL;
+
+ if (!root)
+ return NULL;
+
+ list_for_each_advance_continue(iter, &root->children, reverse) {
+ if (iter->type == FS_TYPE_FLOW_TABLE) {
+ fs_get_obj(ft, iter);
+ return ft;
+ }
+ ft = find_closest_ft_recursive(iter, &iter->children, reverse);
+ if (ft)
+ return ft;
+ }
+
+ return ft;
+}
+
+/* If reverse if false then return the first flow table in next priority of
+ * prio in the tree, else return the last flow table in the previous priority
+ * of prio in the tree.
+ */
+static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
+{
+ struct mlx5_flow_table *ft = NULL;
+ struct fs_node *curr_node;
+ struct fs_node *parent;
+
+ parent = prio->node.parent;
+ curr_node = &prio->node;
+ while (!ft && parent) {
+ ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
+ curr_node = parent;
+ parent = curr_node->parent;
+ }
+ return ft;
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
+{
+ return find_closest_ft(prio, false);
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
+{
+ return find_closest_ft(prio, true);
+}
+
+static int connect_fts_in_prio(struct mlx5_core_dev *dev,
+ struct fs_prio *prio,
+ struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+ struct mlx5_flow_table *iter;
+ int i = 0;
+ int err;
+
+ fs_for_each_ft(iter, prio) {
+ i++;
+ err = root->cmds->modify_flow_table(dev, iter, ft);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to modify flow table %d\n",
+ iter->id);
+ /* The driver is out of sync with the FW */
+ if (i > 1)
+ WARN_ON(true);
+ return err;
+ }
+ }
+ return 0;
+}
+
+/* Connect flow tables from previous priority of prio to ft */
+static int connect_prev_fts(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct mlx5_flow_table *prev_ft;
+
+ prev_ft = find_prev_chained_ft(prio);
+ if (prev_ft) {
+ struct fs_prio *prev_prio;
+
+ fs_get_obj(prev_prio, prev_ft->node.parent);
+ return connect_fts_in_prio(dev, prev_prio, ft);
+ }
+ return 0;
+}
+
+static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
+ *prio)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+ struct mlx5_ft_underlay_qp *uqp;
+ int min_level = INT_MAX;
+ int err;
+ u32 qpn;
+
+ if (root->root_ft)
+ min_level = root->root_ft->level;
+
+ if (ft->level >= min_level)
+ return 0;
+
+ if (list_empty(&root->underlay_qpns)) {
+ /* Don't set any QPN (zero) in case QPN list is empty */
+ qpn = 0;
+ err = root->cmds->update_root_ft(root->dev, ft, qpn, false);
+ } else {
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ qpn = uqp->qpn;
+ err = root->cmds->update_root_ft(root->dev, ft,
+ qpn, false);
+ if (err)
+ break;
+ }
+ }
+
+ if (err)
+ mlx5_core_warn(root->dev,
+ "Update root flow table of id(%u) qpn(%d) failed\n",
+ ft->id, qpn);
+ else
+ root->root_ft = ft;
+
+ return err;
+}
+
+static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct fs_fte *fte;
+ int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ int err = 0;
+
+ fs_get_obj(fte, rule->node.parent);
+ if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return -EINVAL;
+ down_write_ref_node(&fte->node);
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+
+ memcpy(&rule->dest_attr, dest, sizeof(*dest));
+ root = find_root(&ft->node);
+ err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+ modify_mask, fte);
+ up_write_ref_node(&fte->node);
+
+ return err;
+}
+
+int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
+ struct mlx5_flow_destination *new_dest,
+ struct mlx5_flow_destination *old_dest)
+{
+ int i;
+
+ if (!old_dest) {
+ if (handle->num_rules != 1)
+ return -EINVAL;
+ return _mlx5_modify_rule_destination(handle->rule[0],
+ new_dest);
+ }
+
+ for (i = 0; i < handle->num_rules; i++) {
+ if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
+ return _mlx5_modify_rule_destination(handle->rule[i],
+ new_dest);
+ }
+
+ return -EINVAL;
+}
+
+/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */
+static int connect_fwd_rules(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *new_next_ft,
+ struct mlx5_flow_table *old_next_ft)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_rule *iter;
+ int err = 0;
+
+ /* new_next_ft and old_next_ft could be NULL only
+ * when we create/destroy the anchor flow table.
+ */
+ if (!new_next_ft || !old_next_ft)
+ return 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = new_next_ft;
+
+ mutex_lock(&old_next_ft->lock);
+ list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
+ mutex_unlock(&old_next_ft->lock);
+ list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
+ err = _mlx5_modify_rule_destination(iter, &dest);
+ if (err)
+ pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
+ new_next_ft->id);
+ }
+ return 0;
+}
+
+static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct mlx5_flow_table *next_ft, *first_ft;
+ int err = 0;
+
+ /* Connect_prev_fts and update_root_ft_create are mutually exclusive */
+
+ first_ft = list_first_entry_or_null(&prio->node.children,
+ struct mlx5_flow_table, node.list);
+ if (!first_ft || first_ft->level > ft->level) {
+ err = connect_prev_fts(dev, ft, prio);
+ if (err)
+ return err;
+
+ next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
+ err = connect_fwd_rules(dev, ft, next_ft);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.modify_root))
+ err = update_root_ft_create(ft, prio);
+ return err;
+}
+
+static void list_add_flow_table(struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct list_head *prev = &prio->node.children;
+ struct mlx5_flow_table *iter;
+
+ fs_for_each_ft(iter, prio) {
+ if (iter->level > ft->level)
+ break;
+ prev = &iter->node.list;
+ }
+ list_add(&ft->node.list, prev);
+}
+
+static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
+ struct mlx5_flow_table_attr *ft_attr,
+ enum fs_flow_table_op_mod op_mod,
+ u16 vport)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ns->node);
+ struct mlx5_flow_table *next_ft = NULL;
+ struct fs_prio *fs_prio = NULL;
+ struct mlx5_flow_table *ft;
+ int log_table_sz;
+ int err;
+
+ if (!root) {
+ pr_err("mlx5: flow steering failed to find root of namespace\n");
+ return ERR_PTR(-ENODEV);
+ }
+
+ mutex_lock(&root->chain_lock);
+ fs_prio = find_prio(ns, ft_attr->prio);
+ if (!fs_prio) {
+ err = -EINVAL;
+ goto unlock_root;
+ }
+ if (ft_attr->level >= fs_prio->num_levels) {
+ err = -ENOSPC;
+ goto unlock_root;
+ }
+ /* The level is related to the
+ * priority level range.
+ */
+ ft_attr->level += fs_prio->start_level;
+ ft = alloc_flow_table(ft_attr->level,
+ vport,
+ ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
+ root->table_type,
+ op_mod, ft_attr->flags);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto unlock_root;
+ }
+
+ tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
+ log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
+ next_ft = find_next_chained_ft(fs_prio);
+ err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod,
+ ft->type, ft->level, log_table_sz,
+ next_ft, &ft->id, ft->flags);
+ if (err)
+ goto free_ft;
+
+ err = connect_flow_table(root->dev, ft, fs_prio);
+ if (err)
+ goto destroy_ft;
+ ft->node.active = true;
+ down_write_ref_node(&fs_prio->node);
+ tree_add_node(&ft->node, &fs_prio->node);
+ list_add_flow_table(ft, fs_prio);
+ fs_prio->num_ft++;
+ up_write_ref_node(&fs_prio->node);
+ mutex_unlock(&root->chain_lock);
+ return ft;
+destroy_ft:
+ root->cmds->destroy_flow_table(root->dev, ft);
+free_ft:
+ rhltable_destroy(&ft->fgs_hash);
+ kfree(ft);
+unlock_root:
+ mutex_unlock(&root->chain_lock);
+ return ERR_PTR(err);
+}
+
+struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
+ struct mlx5_flow_table_attr *ft_attr)
+{
+ return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0);
+}
+
+struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
+ int prio, int max_fte,
+ u32 level, u16 vport)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+
+ ft_attr.max_fte = max_fte;
+ ft_attr.level = level;
+ ft_attr.prio = prio;
+
+ return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
+}
+
+struct mlx5_flow_table*
+mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
+ int prio, u32 level)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+
+ ft_attr.level = level;
+ ft_attr.prio = prio;
+ return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
+}
+EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
+
+struct mlx5_flow_table*
+mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
+ int prio,
+ int num_flow_table_entries,
+ int max_num_groups,
+ u32 level,
+ u32 flags)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *ft;
+
+ if (max_num_groups > num_flow_table_entries)
+ return ERR_PTR(-EINVAL);
+
+ ft_attr.max_fte = num_flow_table_entries;
+ ft_attr.prio = prio;
+ ft_attr.level = level;
+ ft_attr.flags = flags;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return ft;
+
+ ft->autogroup.active = true;
+ ft->autogroup.required_groups = max_num_groups;
+ /* We save place for flow groups in addition to max types */
+ ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1);
+
+ return ft;
+}
+EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
+
+struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
+ u32 *fg_in)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ fg_in, match_criteria);
+ u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
+ fg_in,
+ match_criteria_enable);
+ int start_index = MLX5_GET(create_flow_group_in, fg_in,
+ start_flow_index);
+ int end_index = MLX5_GET(create_flow_group_in, fg_in,
+ end_flow_index);
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
+ struct mlx5_flow_group *fg;
+ int err;
+
+ if (ft->autogroup.active)
+ return ERR_PTR(-EPERM);
+
+ down_write_ref_node(&ft->node);
+ fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
+ start_index, end_index,
+ ft->node.children.prev);
+ up_write_ref_node(&ft->node);
+ if (IS_ERR(fg))
+ return fg;
+
+ err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id);
+ if (err) {
+ tree_put_node(&fg->node);
+ return ERR_PTR(err);
+ }
+ trace_mlx5_fs_add_fg(fg);
+ fg->node.active = true;
+
+ return fg;
+}
+
+static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_rule *rule;
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return NULL;
+
+ INIT_LIST_HEAD(&rule->next_ft);
+ rule->node.type = FS_TYPE_FLOW_DEST;
+ if (dest)
+ memcpy(&rule->dest_attr, dest, sizeof(*dest));
+
+ return rule;
+}
+
+static struct mlx5_flow_handle *alloc_handle(int num_rules)
+{
+ struct mlx5_flow_handle *handle;
+
+ handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL);
+ if (!handle)
+ return NULL;
+
+ handle->num_rules = num_rules;
+
+ return handle;
+}
+
+static void destroy_flow_handle(struct fs_fte *fte,
+ struct mlx5_flow_handle *handle,
+ struct mlx5_flow_destination *dest,
+ int i)
+{
+ for (; --i >= 0;) {
+ if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) {
+ fte->dests_size--;
+ list_del(&handle->rule[i]->node.list);
+ kfree(handle->rule[i]);
+ }
+ }
+ kfree(handle);
+}
+
+static struct mlx5_flow_handle *
+create_flow_handle(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ int *modify_mask,
+ bool *new_rule)
+{
+ struct mlx5_flow_handle *handle;
+ struct mlx5_flow_rule *rule = NULL;
+ static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+ static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ int type;
+ int i = 0;
+
+ handle = alloc_handle((dest_num) ? dest_num : 1);
+ if (!handle)
+ return ERR_PTR(-ENOMEM);
+
+ do {
+ if (dest) {
+ rule = find_flow_rule(fte, dest + i);
+ if (rule) {
+ refcount_inc(&rule->node.refcount);
+ goto rule_found;
+ }
+ }
+
+ *new_rule = true;
+ rule = alloc_rule(dest + i);
+ if (!rule)
+ goto free_rules;
+
+ /* Add dest to dests list- we need flow tables to be in the
+ * end of the list for forward to next prio rules.
+ */
+ tree_init_node(&rule->node, NULL, del_sw_hw_rule);
+ if (dest &&
+ dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+ list_add(&rule->node.list, &fte->node.children);
+ else
+ list_add_tail(&rule->node.list, &fte->node.children);
+ if (dest) {
+ fte->dests_size++;
+
+ type = dest[i].type ==
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ *modify_mask |= type ? count : dst;
+ }
+rule_found:
+ handle->rule[i] = rule;
+ } while (++i < dest_num);
+
+ return handle;
+
+free_rules:
+ destroy_flow_handle(fte, handle, dest, i);
+ return ERR_PTR(-ENOMEM);
+}
+
+/* fte should not be deleted while calling this function */
+static struct mlx5_flow_handle *
+add_rule_fte(struct fs_fte *fte,
+ struct mlx5_flow_group *fg,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ bool update_action)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_handle *handle;
+ struct mlx5_flow_table *ft;
+ int modify_mask = 0;
+ int err;
+ bool new_rule = false;
+
+ handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
+ &new_rule);
+ if (IS_ERR(handle) || !new_rule)
+ goto out;
+
+ if (update_action)
+ modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+
+ fs_get_obj(ft, fg->node.parent);
+ root = find_root(&fg->node);
+ if (!(fte->status & FS_FTE_STATUS_EXISTING))
+ err = root->cmds->create_fte(get_dev(&ft->node),
+ ft, fg, fte);
+ else
+ err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+ modify_mask, fte);
+ if (err)
+ goto free_handle;
+
+ fte->node.active = true;
+ fte->status |= FS_FTE_STATUS_EXISTING;
+ atomic_inc(&fte->node.version);
+
+out:
+ return handle;
+
+free_handle:
+ destroy_flow_handle(fte, handle, dest, handle->num_rules);
+ return ERR_PTR(err);
+}
+
+static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec)
+{
+ struct list_head *prev = &ft->node.children;
+ struct mlx5_flow_group *fg;
+ unsigned int candidate_index = 0;
+ unsigned int group_size = 0;
+
+ if (!ft->autogroup.active)
+ return ERR_PTR(-ENOENT);
+
+ if (ft->autogroup.num_groups < ft->autogroup.required_groups)
+ group_size = ft->autogroup.group_size;
+
+ /* ft->max_fte == ft->autogroup.max_types */
+ if (group_size == 0)
+ group_size = 1;
+
+ /* sorted by start_index */
+ fs_for_each_fg(fg, ft) {
+ if (candidate_index + group_size > fg->start_index)
+ candidate_index = fg->start_index + fg->max_ftes;
+ else
+ break;
+ prev = &fg->node.list;
+ }
+
+ if (candidate_index + group_size > ft->max_fte)
+ return ERR_PTR(-ENOSPC);
+
+ fg = alloc_insert_flow_group(ft,
+ spec->match_criteria_enable,
+ spec->match_criteria,
+ candidate_index,
+ candidate_index + group_size - 1,
+ prev);
+ if (IS_ERR(fg))
+ goto out;
+
+ if (group_size == ft->autogroup.group_size)
+ ft->autogroup.num_groups++;
+
+out:
+ return fg;
+}
+
+static int create_auto_flow_group(struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *match_criteria_addr;
+ u8 src_esw_owner_mask_on;
+ void *misc;
+ int err;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ fg->mask.match_criteria_enable);
+ MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index +
+ fg->max_ftes - 1);
+
+ misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria,
+ misc_parameters);
+ src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, in,
+ source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on);
+
+ match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
+ in, match_criteria);
+ memcpy(match_criteria_addr, fg->mask.match_criteria,
+ sizeof(fg->mask.match_criteria));
+
+ err = root->cmds->create_flow_group(dev, ft, in, &fg->id);
+ if (!err) {
+ fg->node.active = true;
+ trace_mlx5_fs_add_fg(fg);
+ }
+
+ kvfree(in);
+ return err;
+}
+
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+ struct mlx5_flow_destination *d2)
+{
+ if (d1->type == d2->type) {
+ if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ d1->vport.num == d2->vport.num) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+ d1->ft == d2->ft) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ d1->tir_num == d2->tir_num) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
+ d1->ft_num == d2->ft_num))
+ return true;
+ }
+
+ return false;
+}
+
+static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_rule *rule;
+
+ list_for_each_entry(rule, &fte->node.children, node.list) {
+ if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
+ return rule;
+ }
+ return NULL;
+}
+
+static bool check_conflicting_actions(u32 action1, u32 action2)
+{
+ u32 xored_actions = action1 ^ action2;
+
+ /* if one rule only wants to count, it's ok */
+ if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
+ action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return false;
+
+ if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
+ return true;
+
+ return false;
+}
+
+static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+{
+ if (check_conflicting_actions(flow_act->action, fte->action.action)) {
+ mlx5_core_warn(get_dev(&fte->node),
+ "Found two FTEs with conflicting actions\n");
+ return -EEXIST;
+ }
+
+ if (flow_act->has_flow_tag &&
+ fte->action.flow_tag != flow_act->flow_tag) {
+ mlx5_core_warn(get_dev(&fte->node),
+ "FTE flow tag %u already exists with different flow tag %u\n",
+ fte->action.flow_tag,
+ flow_act->flow_tag);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
+ u32 *match_value,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ struct fs_fte *fte)
+{
+ struct mlx5_flow_handle *handle;
+ int old_action;
+ int i;
+ int ret;
+
+ ret = check_conflicting_ftes(fte, flow_act);
+ if (ret)
+ return ERR_PTR(ret);
+
+ old_action = fte->action.action;
+ fte->action.action |= flow_act->action;
+ handle = add_rule_fte(fte, fg, dest, dest_num,
+ old_action != flow_act->action);
+ if (IS_ERR(handle)) {
+ fte->action.action = old_action;
+ return handle;
+ }
+ trace_mlx5_fs_set_fte(fte, false);
+
+ for (i = 0; i < handle->num_rules; i++) {
+ if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+ tree_add_node(&handle->rule[i]->node, &fte->node);
+ trace_mlx5_fs_add_rule(handle->rule[i]);
+ }
+ }
+ return handle;
+}
+
+struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
+{
+ struct mlx5_flow_rule *dst;
+ struct fs_fte *fte;
+
+ fs_get_obj(fte, handle->rule[0]->node.parent);
+
+ fs_for_each_dst(dst, fte) {
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ return dst->dest_attr.counter;
+ }
+
+ return NULL;
+}
+
+static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
+{
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
+ return !counter;
+
+ if (!counter)
+ return false;
+
+ return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
+}
+
+static bool dest_is_valid(struct mlx5_flow_destination *dest,
+ u32 action,
+ struct mlx5_flow_table *ft)
+{
+ if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
+ return counter_is_valid(dest->counter, action);
+
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return true;
+
+ if (!dest || ((dest->type ==
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
+ (dest->ft->level <= ft->level)))
+ return false;
+ return true;
+}
+
+struct match_list {
+ struct list_head list;
+ struct mlx5_flow_group *g;
+};
+
+struct match_list_head {
+ struct list_head list;
+ struct match_list first;
+};
+
+static void free_match_list(struct match_list_head *head)
+{
+ if (!list_empty(&head->list)) {
+ struct match_list *iter, *match_tmp;
+
+ list_del(&head->first.list);
+ tree_put_node(&head->first.g->node);
+ list_for_each_entry_safe(iter, match_tmp, &head->list,
+ list) {
+ tree_put_node(&iter->g->node);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+ }
+}
+
+static int build_match_list(struct match_list_head *match_head,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec)
+{
+ struct rhlist_head *tmp, *list;
+ struct mlx5_flow_group *g;
+ int err = 0;
+
+ rcu_read_lock();
+ INIT_LIST_HEAD(&match_head->list);
+ /* Collect all fgs which has a matching match_criteria */
+ list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
+ /* RCU is atomic, we can't execute FW commands here */
+ rhl_for_each_entry_rcu(g, tmp, list, hash) {
+ struct match_list *curr_match;
+
+ if (likely(list_empty(&match_head->list))) {
+ if (!tree_get_node(&g->node))
+ continue;
+ match_head->first.g = g;
+ list_add_tail(&match_head->first.list,
+ &match_head->list);
+ continue;
+ }
+
+ curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
+ if (!curr_match) {
+ rcu_read_unlock();
+ free_match_list(match_head);
+ return -ENOMEM;
+ }
+ if (!tree_get_node(&g->node)) {
+ kfree(curr_match);
+ continue;
+ }
+ curr_match->g = g;
+ list_add_tail(&curr_match->list, &match_head->list);
+ }
+ rcu_read_unlock();
+ return err;
+}
+
+static u64 matched_fgs_get_version(struct list_head *match_head)
+{
+ struct match_list *iter;
+ u64 version = 0;
+
+ list_for_each_entry(iter, match_head, list)
+ version += (u64)atomic_read(&iter->g->node.version);
+ return version;
+}
+
+static struct fs_fte *
+lookup_fte_locked(struct mlx5_flow_group *g,
+ u32 *match_value,
+ bool take_write)
+{
+ struct fs_fte *fte_tmp;
+
+ if (take_write)
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+ else
+ nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
+ fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
+ rhash_fte);
+ if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
+ fte_tmp = NULL;
+ goto out;
+ }
+ if (!fte_tmp->node.active) {
+ tree_put_node(&fte_tmp->node);
+ fte_tmp = NULL;
+ goto out;
+ }
+
+ nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+out:
+ if (take_write)
+ up_write_ref_node(&g->node);
+ else
+ up_read_ref_node(&g->node);
+ return fte_tmp;
+}
+
+static struct mlx5_flow_handle *
+try_add_to_existing_fg(struct mlx5_flow_table *ft,
+ struct list_head *match_head,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ int ft_version)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *g;
+ struct mlx5_flow_handle *rule;
+ struct match_list *iter;
+ bool take_write = false;
+ struct fs_fte *fte;
+ u64 version;
+ int err;
+
+ fte = alloc_fte(ft, spec->match_value, flow_act);
+ if (IS_ERR(fte))
+ return ERR_PTR(-ENOMEM);
+
+search_again_locked:
+ version = matched_fgs_get_version(match_head);
+ /* Try to find a fg that already contains a matching fte */
+ list_for_each_entry(iter, match_head, list) {
+ struct fs_fte *fte_tmp;
+
+ g = iter->g;
+ fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
+ if (!fte_tmp)
+ continue;
+ rule = add_rule_fg(g, spec->match_value,
+ flow_act, dest, dest_num, fte_tmp);
+ up_write_ref_node(&fte_tmp->node);
+ tree_put_node(&fte_tmp->node);
+ kmem_cache_free(steering->ftes_cache, fte);
+ return rule;
+ }
+
+ /* Check the ft version, for case that new flow group
+ * was added while the fgs weren't locked
+ */
+ if (atomic_read(&ft->node.version) != ft_version) {
+ rule = ERR_PTR(-EAGAIN);
+ goto out;
+ }
+
+ /* Check the fgs version, for case the new FTE with the
+ * same values was added while the fgs weren't locked
+ */
+ if (version != matched_fgs_get_version(match_head)) {
+ take_write = true;
+ goto search_again_locked;
+ }
+
+ list_for_each_entry(iter, match_head, list) {
+ g = iter->g;
+
+ if (!g->node.active)
+ continue;
+
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+
+ err = insert_fte(g, fte);
+ if (err) {
+ up_write_ref_node(&g->node);
+ if (err == -ENOSPC)
+ continue;
+ kmem_cache_free(steering->ftes_cache, fte);
+ return ERR_PTR(err);
+ }
+
+ nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+ up_write_ref_node(&g->node);
+ rule = add_rule_fg(g, spec->match_value,
+ flow_act, dest, dest_num, fte);
+ up_write_ref_node(&fte->node);
+ tree_put_node(&fte->node);
+ return rule;
+ }
+ rule = ERR_PTR(-ENOENT);
+out:
+ kmem_cache_free(steering->ftes_cache, fte);
+ return rule;
+}
+
+static struct mlx5_flow_handle *
+_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num)
+
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *g;
+ struct mlx5_flow_handle *rule;
+ struct match_list_head match_head;
+ bool take_write = false;
+ struct fs_fte *fte;
+ int version;
+ int err;
+ int i;
+
+ if (!check_valid_spec(spec))
+ return ERR_PTR(-EINVAL);
+
+ for (i = 0; i < dest_num; i++) {
+ if (!dest_is_valid(&dest[i], flow_act->action, ft))
+ return ERR_PTR(-EINVAL);
+ }
+ nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+search_again_locked:
+ version = atomic_read(&ft->node.version);
+
+ /* Collect all fgs which has a matching match_criteria */
+ err = build_match_list(&match_head, ft, spec);
+ if (err) {
+ if (take_write)
+ up_write_ref_node(&ft->node);
+ else
+ up_read_ref_node(&ft->node);
+ return ERR_PTR(err);
+ }
+
+ if (!take_write)
+ up_read_ref_node(&ft->node);
+
+ rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest,
+ dest_num, version);
+ free_match_list(&match_head);
+ if (!IS_ERR(rule) ||
+ (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
+ if (take_write)
+ up_write_ref_node(&ft->node);
+ return rule;
+ }
+
+ if (!take_write) {
+ nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+ take_write = true;
+ }
+
+ if (PTR_ERR(rule) == -EAGAIN ||
+ version != atomic_read(&ft->node.version))
+ goto search_again_locked;
+
+ g = alloc_auto_flow_group(ft, spec);
+ if (IS_ERR(g)) {
+ rule = ERR_CAST(g);
+ up_write_ref_node(&ft->node);
+ return rule;
+ }
+
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+ up_write_ref_node(&ft->node);
+
+ err = create_auto_flow_group(ft, g);
+ if (err)
+ goto err_release_fg;
+
+ fte = alloc_fte(ft, spec->match_value, flow_act);
+ if (IS_ERR(fte)) {
+ err = PTR_ERR(fte);
+ goto err_release_fg;
+ }
+
+ err = insert_fte(g, fte);
+ if (err) {
+ kmem_cache_free(steering->ftes_cache, fte);
+ goto err_release_fg;
+ }
+
+ nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+ up_write_ref_node(&g->node);
+ rule = add_rule_fg(g, spec->match_value, flow_act, dest,
+ dest_num, fte);
+ up_write_ref_node(&fte->node);
+ tree_put_node(&fte->node);
+ tree_put_node(&g->node);
+ return rule;
+
+err_release_fg:
+ up_write_ref_node(&g->node);
+ tree_put_node(&g->node);
+ return ERR_PTR(err);
+}
+
+static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
+{
+ return ((ft->type == FS_FT_NIC_RX) &&
+ (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
+}
+
+struct mlx5_flow_handle *
+mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int num_dest)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_flow_destination gen_dest = {};
+ struct mlx5_flow_table *next_ft = NULL;
+ struct mlx5_flow_handle *handle = NULL;
+ u32 sw_action = flow_act->action;
+ struct fs_prio *prio;
+
+ fs_get_obj(prio, ft->node.parent);
+ if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ if (!fwd_next_prio_supported(ft))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (num_dest)
+ return ERR_PTR(-EINVAL);
+ mutex_lock(&root->chain_lock);
+ next_ft = find_next_chained_ft(prio);
+ if (next_ft) {
+ gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ gen_dest.ft = next_ft;
+ dest = &gen_dest;
+ num_dest = 1;
+ flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else {
+ mutex_unlock(&root->chain_lock);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ }
+
+ handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
+
+ if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ if (!IS_ERR_OR_NULL(handle) &&
+ (list_empty(&handle->rule[0]->next_ft))) {
+ mutex_lock(&next_ft->lock);
+ list_add(&handle->rule[0]->next_ft,
+ &next_ft->fwd_rules);
+ mutex_unlock(&next_ft->lock);
+ handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ }
+ mutex_unlock(&root->chain_lock);
+ }
+ return handle;
+}
+EXPORT_SYMBOL(mlx5_add_flow_rules);
+
+void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
+{
+ int i;
+
+ for (i = handle->num_rules - 1; i >= 0; i--)
+ tree_remove_node(&handle->rule[i]->node);
+ kfree(handle);
+}
+EXPORT_SYMBOL(mlx5_del_flow_rules);
+
+/* Assuming prio->node.children(flow tables) is sorted by level */
+static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
+{
+ struct fs_prio *prio;
+
+ fs_get_obj(prio, ft->node.parent);
+
+ if (!list_is_last(&ft->node.list, &prio->node.children))
+ return list_next_entry(ft, node.list);
+ return find_next_chained_ft(prio);
+}
+
+static int update_root_ft_destroy(struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_ft_underlay_qp *uqp;
+ struct mlx5_flow_table *new_root_ft = NULL;
+ int err = 0;
+ u32 qpn;
+
+ if (root->root_ft != ft)
+ return 0;
+
+ new_root_ft = find_next_ft(ft);
+ if (!new_root_ft) {
+ root->root_ft = NULL;
+ return 0;
+ }
+
+ if (list_empty(&root->underlay_qpns)) {
+ /* Don't set any QPN (zero) in case QPN list is empty */
+ qpn = 0;
+ err = root->cmds->update_root_ft(root->dev, new_root_ft,
+ qpn, false);
+ } else {
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ qpn = uqp->qpn;
+ err = root->cmds->update_root_ft(root->dev,
+ new_root_ft, qpn,
+ false);
+ if (err)
+ break;
+ }
+ }
+
+ if (err)
+ mlx5_core_warn(root->dev,
+ "Update root flow table of id(%u) qpn(%d) failed\n",
+ ft->id, qpn);
+ else
+ root->root_ft = new_root_ft;
+
+ return 0;
+}
+
+/* Connect flow table from previous priority to
+ * the next flow table.
+ */
+static int disconnect_flow_table(struct mlx5_flow_table *ft)
+{
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
+ struct mlx5_flow_table *next_ft;
+ struct fs_prio *prio;
+ int err = 0;
+
+ err = update_root_ft_destroy(ft);
+ if (err)
+ return err;
+
+ fs_get_obj(prio, ft->node.parent);
+ if (!(list_first_entry(&prio->node.children,
+ struct mlx5_flow_table,
+ node.list) == ft))
+ return 0;
+
+ next_ft = find_next_ft(ft);
+ err = connect_fwd_rules(dev, next_ft, ft);
+ if (err)
+ return err;
+
+ err = connect_prev_fts(dev, next_ft, prio);
+ if (err)
+ mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
+ ft->id);
+ return err;
+}
+
+int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ int err = 0;
+
+ mutex_lock(&root->chain_lock);
+ err = disconnect_flow_table(ft);
+ if (err) {
+ mutex_unlock(&root->chain_lock);
+ return err;
+ }
+ if (tree_remove_node(&ft->node))
+ mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
+ ft->id);
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_destroy_flow_table);
+
+void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
+{
+ if (tree_remove_node(&fg->node))
+ mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
+ fg->id);
+}
+
+struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type type)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ struct mlx5_flow_root_namespace *root_ns;
+ int prio;
+ struct fs_prio *fs_prio;
+ struct mlx5_flow_namespace *ns;
+
+ if (!steering)
+ return NULL;
+
+ switch (type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ case MLX5_FLOW_NAMESPACE_LAG:
+ case MLX5_FLOW_NAMESPACE_OFFLOADS:
+ case MLX5_FLOW_NAMESPACE_ETHTOOL:
+ case MLX5_FLOW_NAMESPACE_KERNEL:
+ case MLX5_FLOW_NAMESPACE_LEFTOVERS:
+ case MLX5_FLOW_NAMESPACE_ANCHOR:
+ prio = type;
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB:
+ if (steering->fdb_root_ns)
+ return &steering->fdb_root_ns->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
+ if (steering->sniffer_rx_root_ns)
+ return &steering->sniffer_rx_root_ns->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
+ if (steering->sniffer_tx_root_ns)
+ return &steering->sniffer_tx_root_ns->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ if (steering->egress_root_ns)
+ return &steering->egress_root_ns->ns;
+ else
+ return NULL;
+ default:
+ return NULL;
+ }
+
+ root_ns = steering->root_ns;
+ if (!root_ns)
+ return NULL;
+
+ fs_prio = find_prio(&root_ns->ns, prio);
+ if (!fs_prio)
+ return NULL;
+
+ ns = list_first_entry(&fs_prio->node.children,
+ typeof(*ns),
+ node.list);
+
+ return ns;
+}
+EXPORT_SYMBOL(mlx5_get_flow_namespace);
+
+struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ int vport)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ if (!steering || vport >= MLX5_TOTAL_VPORTS(dev))
+ return NULL;
+
+ switch (type) {
+ case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
+ if (steering->esw_egress_root_ns &&
+ steering->esw_egress_root_ns[vport])
+ return &steering->esw_egress_root_ns[vport]->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+ if (steering->esw_ingress_root_ns &&
+ steering->esw_ingress_root_ns[vport])
+ return &steering->esw_ingress_root_ns[vport]->ns;
+ else
+ return NULL;
+ default:
+ return NULL;
+ }
+}
+
+static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio, int num_levels)
+{
+ struct fs_prio *fs_prio;
+
+ fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
+ if (!fs_prio)
+ return ERR_PTR(-ENOMEM);
+
+ fs_prio->node.type = FS_TYPE_PRIO;
+ tree_init_node(&fs_prio->node, NULL, del_sw_prio);
+ tree_add_node(&fs_prio->node, &ns->node);
+ fs_prio->num_levels = num_levels;
+ fs_prio->prio = prio;
+ list_add_tail(&fs_prio->node.list, &ns->node.children);
+
+ return fs_prio;
+}
+
+static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
+ *ns)
+{
+ ns->node.type = FS_TYPE_NAMESPACE;
+
+ return ns;
+}
+
+static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
+{
+ struct mlx5_flow_namespace *ns;
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ return ERR_PTR(-ENOMEM);
+
+ fs_init_namespace(ns);
+ tree_init_node(&ns->node, NULL, del_sw_ns);
+ tree_add_node(&ns->node, &prio->node);
+ list_add_tail(&ns->node.list, &prio->node.children);
+
+ return ns;
+}
+
+static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
+ struct init_tree_node *prio_metadata)
+{
+ struct fs_prio *fs_prio;
+ int i;
+
+ for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
+ fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels);
+ if (IS_ERR(fs_prio))
+ return PTR_ERR(fs_prio);
+ }
+ return 0;
+}
+
+#define FLOW_TABLE_BIT_SZ 1
+#define GET_FLOW_TABLE_CAP(dev, offset) \
+ ((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) + \
+ offset / 32)) >> \
+ (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
+static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
+{
+ int i;
+
+ for (i = 0; i < caps->arr_sz; i++) {
+ if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
+ return false;
+ }
+ return true;
+}
+
+static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
+ struct init_tree_node *init_node,
+ struct fs_node *fs_parent_node,
+ struct init_tree_node *init_parent_node,
+ int prio)
+{
+ int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
+ flow_table_properties_nic_receive.
+ max_ft_level);
+ struct mlx5_flow_namespace *fs_ns;
+ struct fs_prio *fs_prio;
+ struct fs_node *base;
+ int i;
+ int err;
+
+ if (init_node->type == FS_TYPE_PRIO) {
+ if ((init_node->min_ft_level > max_ft_level) ||
+ !has_required_caps(steering->dev, &init_node->caps))
+ return 0;
+
+ fs_get_obj(fs_ns, fs_parent_node);
+ if (init_node->num_leaf_prios)
+ return create_leaf_prios(fs_ns, prio, init_node);
+ fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels);
+ if (IS_ERR(fs_prio))
+ return PTR_ERR(fs_prio);
+ base = &fs_prio->node;
+ } else if (init_node->type == FS_TYPE_NAMESPACE) {
+ fs_get_obj(fs_prio, fs_parent_node);
+ fs_ns = fs_create_namespace(fs_prio);
+ if (IS_ERR(fs_ns))
+ return PTR_ERR(fs_ns);
+ base = &fs_ns->node;
+ } else {
+ return -EINVAL;
+ }
+ prio = 0;
+ for (i = 0; i < init_node->ar_size; i++) {
+ err = init_root_tree_recursive(steering, &init_node->children[i],
+ base, init_node, prio);
+ if (err)
+ return err;
+ if (init_node->children[i].type == FS_TYPE_PRIO &&
+ init_node->children[i].num_leaf_prios) {
+ prio += init_node->children[i].num_leaf_prios;
+ }
+ }
+
+ return 0;
+}
+
+static int init_root_tree(struct mlx5_flow_steering *steering,
+ struct init_tree_node *init_node,
+ struct fs_node *fs_parent_node)
+{
+ int i;
+ struct mlx5_flow_namespace *fs_ns;
+ int err;
+
+ fs_get_obj(fs_ns, fs_parent_node);
+ for (i = 0; i < init_node->ar_size; i++) {
+ err = init_root_tree_recursive(steering, &init_node->children[i],
+ &fs_ns->node,
+ init_node, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static struct mlx5_flow_root_namespace
+*create_root_ns(struct mlx5_flow_steering *steering,
+ enum fs_flow_table_type table_type)
+{
+ const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5_flow_namespace *ns;
+
+ if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+ (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
+ cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
+
+ /* Create the root namespace */
+ root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
+ if (!root_ns)
+ return NULL;
+
+ root_ns->dev = steering->dev;
+ root_ns->table_type = table_type;
+ root_ns->cmds = cmds;
+
+ INIT_LIST_HEAD(&root_ns->underlay_qpns);
+
+ ns = &root_ns->ns;
+ fs_init_namespace(ns);
+ mutex_init(&root_ns->chain_lock);
+ tree_init_node(&ns->node, NULL, NULL);
+ tree_add_node(&ns->node, NULL);
+
+ return root_ns;
+}
+
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
+
+static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
+{
+ struct fs_prio *prio;
+
+ fs_for_each_prio(prio, ns) {
+ /* This updates prio start_level and num_levels */
+ set_prio_attrs_in_prio(prio, acc_level);
+ acc_level += prio->num_levels;
+ }
+ return acc_level;
+}
+
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
+{
+ struct mlx5_flow_namespace *ns;
+ int acc_level_ns = acc_level;
+
+ prio->start_level = acc_level;
+ fs_for_each_ns(ns, prio)
+ /* This updates start_level and num_levels of ns's priority descendants */
+ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
+ if (!prio->num_levels)
+ prio->num_levels = acc_level_ns - prio->start_level;
+ WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
+}
+
+static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
+{
+ struct mlx5_flow_namespace *ns = &root_ns->ns;
+ struct fs_prio *prio;
+ int start_level = 0;
+
+ fs_for_each_prio(prio, ns) {
+ set_prio_attrs_in_prio(prio, start_level);
+ start_level += prio->num_levels;
+ }
+}
+
+#define ANCHOR_PRIO 0
+#define ANCHOR_SIZE 1
+#define ANCHOR_LEVEL 0
+static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *ft;
+
+ ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
+ if (WARN_ON(!ns))
+ return -EINVAL;
+
+ ft_attr.max_fte = ANCHOR_SIZE;
+ ft_attr.level = ANCHOR_LEVEL;
+ ft_attr.prio = ANCHOR_PRIO;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
+ return PTR_ERR(ft);
+ }
+ return 0;
+}
+
+static int init_root_ns(struct mlx5_flow_steering *steering)
+{
+ int err;
+
+ steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
+ if (!steering->root_ns)
+ return -ENOMEM;
+
+ err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
+ if (err)
+ goto out_err;
+
+ set_prio_attrs(steering->root_ns);
+ err = create_anchor_flow_table(steering);
+ if (err)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ cleanup_root_ns(steering->root_ns);
+ steering->root_ns = NULL;
+ return err;
+}
+
+static void clean_tree(struct fs_node *node)
+{
+ if (node) {
+ struct fs_node *iter;
+ struct fs_node *temp;
+
+ tree_get_node(node);
+ list_for_each_entry_safe(iter, temp, &node->children, list)
+ clean_tree(iter);
+ tree_put_node(node);
+ tree_remove_node(node);
+ }
+}
+
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
+{
+ if (!root_ns)
+ return;
+
+ clean_tree(&root_ns->ns.node);
+}
+
+static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int i;
+
+ if (!steering->esw_egress_root_ns)
+ return;
+
+ for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+ cleanup_root_ns(steering->esw_egress_root_ns[i]);
+
+ kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
+}
+
+static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int i;
+
+ if (!steering->esw_ingress_root_ns)
+ return;
+
+ for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+ cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+
+ kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
+}
+
+void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ cleanup_root_ns(steering->root_ns);
+ cleanup_egress_acls_root_ns(dev);
+ cleanup_ingress_acls_root_ns(dev);
+ cleanup_root_ns(steering->fdb_root_ns);
+ cleanup_root_ns(steering->sniffer_rx_root_ns);
+ cleanup_root_ns(steering->sniffer_tx_root_ns);
+ cleanup_root_ns(steering->egress_root_ns);
+ mlx5_cleanup_fc_stats(dev);
+ kmem_cache_destroy(steering->ftes_cache);
+ kmem_cache_destroy(steering->fgs_cache);
+ kfree(steering);
+}
+
+static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
+ if (!steering->sniffer_tx_root_ns)
+ return -ENOMEM;
+
+ /* Create single prio */
+ prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
+ if (IS_ERR(prio)) {
+ cleanup_root_ns(steering->sniffer_tx_root_ns);
+ return PTR_ERR(prio);
+ }
+ return 0;
+}
+
+static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
+ if (!steering->sniffer_rx_root_ns)
+ return -ENOMEM;
+
+ /* Create single prio */
+ prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
+ if (IS_ERR(prio)) {
+ cleanup_root_ns(steering->sniffer_rx_root_ns);
+ return PTR_ERR(prio);
+ }
+ return 0;
+}
+
+static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
+ if (!steering->fdb_root_ns)
+ return -ENOMEM;
+
+ prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2);
+ if (IS_ERR(prio))
+ goto out_err;
+
+ prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
+ if (IS_ERR(prio))
+ goto out_err;
+
+ set_prio_attrs(steering->fdb_root_ns);
+ return 0;
+
+out_err:
+ cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ return PTR_ERR(prio);
+}
+
+static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
+{
+ struct fs_prio *prio;
+
+ steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
+ if (!steering->esw_egress_root_ns[vport])
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
+{
+ struct fs_prio *prio;
+
+ steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
+ if (!steering->esw_ingress_root_ns[vport])
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err;
+ int i;
+
+ steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
+ sizeof(*steering->esw_egress_root_ns),
+ GFP_KERNEL);
+ if (!steering->esw_egress_root_ns)
+ return -ENOMEM;
+
+ for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+ err = init_egress_acl_root_ns(steering, i);
+ if (err)
+ goto cleanup_root_ns;
+ }
+
+ return 0;
+
+cleanup_root_ns:
+ for (i--; i >= 0; i--)
+ cleanup_root_ns(steering->esw_egress_root_ns[i]);
+ kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
+ return err;
+}
+
+static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err;
+ int i;
+
+ steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
+ sizeof(*steering->esw_ingress_root_ns),
+ GFP_KERNEL);
+ if (!steering->esw_ingress_root_ns)
+ return -ENOMEM;
+
+ for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+ err = init_ingress_acl_root_ns(steering, i);
+ if (err)
+ goto cleanup_root_ns;
+ }
+
+ return 0;
+
+cleanup_root_ns:
+ for (i--; i >= 0; i--)
+ cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+ kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
+ return err;
+}
+
+static int init_egress_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->egress_root_ns = create_root_ns(steering,
+ FS_FT_NIC_TX);
+ if (!steering->egress_root_ns)
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+int mlx5_init_fs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering;
+ int err = 0;
+
+ err = mlx5_init_fc_stats(dev);
+ if (err)
+ return err;
+
+ steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+ if (!steering)
+ return -ENOMEM;
+ steering->dev = dev;
+ dev->priv.steering = steering;
+
+ steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+ sizeof(struct mlx5_flow_group), 0,
+ 0, NULL);
+ steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+ 0, NULL);
+ if (!steering->ftes_cache || !steering->fgs_cache) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ (MLX5_CAP_GEN(dev, nic_flow_table))) ||
+ ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
+ MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
+ err = init_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_ESWITCH_MANAGER(dev)) {
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
+ err = init_fdb_root_ns(steering);
+ if (err)
+ goto err;
+ }
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
+ err = init_egress_acls_root_ns(dev);
+ if (err)
+ goto err;
+ }
+ if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
+ err = init_ingress_acls_root_ns(dev);
+ if (err)
+ goto err;
+ }
+ }
+
+ if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
+ err = init_sniffer_rx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
+ err = init_sniffer_tx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_IPSEC_DEV(dev)) {
+ err = init_egress_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ return 0;
+err:
+ mlx5_cleanup_fs(dev);
+ return err;
+}
+
+int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+ struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+ struct mlx5_ft_underlay_qp *new_uqp;
+ int err = 0;
+
+ new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL);
+ if (!new_uqp)
+ return -ENOMEM;
+
+ mutex_lock(&root->chain_lock);
+
+ if (!root->root_ft) {
+ err = -EINVAL;
+ goto update_ft_fail;
+ }
+
+ err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+ false);
+ if (err) {
+ mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
+ underlay_qpn, err);
+ goto update_ft_fail;
+ }
+
+ new_uqp->qpn = underlay_qpn;
+ list_add_tail(&new_uqp->list, &root->underlay_qpns);
+
+ mutex_unlock(&root->chain_lock);
+
+ return 0;
+
+update_ft_fail:
+ mutex_unlock(&root->chain_lock);
+ kfree(new_uqp);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
+
+int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+ struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+ struct mlx5_ft_underlay_qp *uqp;
+ bool found = false;
+ int err = 0;
+
+ mutex_lock(&root->chain_lock);
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ if (uqp->qpn == underlay_qpn) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n",
+ underlay_qpn);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+ true);
+ if (err)
+ mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
+ underlay_qpn, err);
+
+ list_del(&uqp->list);
+ mutex_unlock(&root->chain_lock);
+ kfree(uqp);
+
+ return 0;
+
+out:
+ mutex_unlock(&root->chain_lock);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
new file mode 100644
index 000000000..ba62fbce2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_CORE_
+#define _MLX5_FS_CORE_
+
+#include <linux/refcount.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rhashtable.h>
+
+enum fs_node_type {
+ FS_TYPE_NAMESPACE,
+ FS_TYPE_PRIO,
+ FS_TYPE_FLOW_TABLE,
+ FS_TYPE_FLOW_GROUP,
+ FS_TYPE_FLOW_ENTRY,
+ FS_TYPE_FLOW_DEST
+};
+
+enum fs_flow_table_type {
+ FS_FT_NIC_RX = 0x0,
+ FS_FT_NIC_TX = 0x1,
+ FS_FT_ESW_EGRESS_ACL = 0x2,
+ FS_FT_ESW_INGRESS_ACL = 0x3,
+ FS_FT_FDB = 0X4,
+ FS_FT_SNIFFER_RX = 0X5,
+ FS_FT_SNIFFER_TX = 0X6,
+ FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX,
+};
+
+enum fs_flow_table_op_mod {
+ FS_FT_OP_MOD_NORMAL,
+ FS_FT_OP_MOD_LAG_DEMUX,
+};
+
+enum fs_fte_status {
+ FS_FTE_STATUS_EXISTING = 1UL << 0,
+};
+
+struct mlx5_flow_steering {
+ struct mlx5_core_dev *dev;
+ struct kmem_cache *fgs_cache;
+ struct kmem_cache *ftes_cache;
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5_flow_root_namespace *fdb_root_ns;
+ struct mlx5_flow_root_namespace **esw_egress_root_ns;
+ struct mlx5_flow_root_namespace **esw_ingress_root_ns;
+ struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
+ struct mlx5_flow_root_namespace *sniffer_rx_root_ns;
+ struct mlx5_flow_root_namespace *egress_root_ns;
+};
+
+struct fs_node {
+ struct list_head list;
+ struct list_head children;
+ enum fs_node_type type;
+ struct fs_node *parent;
+ struct fs_node *root;
+ /* lock the node for writing and traversing */
+ struct rw_semaphore lock;
+ refcount_t refcount;
+ bool active;
+ void (*del_hw_func)(struct fs_node *);
+ void (*del_sw_func)(struct fs_node *);
+ atomic_t version;
+};
+
+struct mlx5_flow_rule {
+ struct fs_node node;
+ struct mlx5_flow_destination dest_attr;
+ /* next_ft should be accessed under chain_lock and only of
+ * destination type is FWD_NEXT_fT.
+ */
+ struct list_head next_ft;
+ u32 sw_action;
+};
+
+struct mlx5_flow_handle {
+ int num_rules;
+ struct mlx5_flow_rule *rule[];
+};
+
+/* Type of children is mlx5_flow_group */
+struct mlx5_flow_table {
+ struct fs_node node;
+ u32 id;
+ u16 vport;
+ unsigned int max_fte;
+ unsigned int level;
+ enum fs_flow_table_type type;
+ enum fs_flow_table_op_mod op_mod;
+ struct {
+ bool active;
+ unsigned int required_groups;
+ unsigned int group_size;
+ unsigned int num_groups;
+ } autogroup;
+ /* Protect fwd_rules */
+ struct mutex lock;
+ /* FWD rules that point on this flow table */
+ struct list_head fwd_rules;
+ u32 flags;
+ struct rhltable fgs_hash;
+};
+
+struct mlx5_fc_cache {
+ u64 packets;
+ u64 bytes;
+ u64 lastuse;
+};
+
+struct mlx5_fc {
+ struct rb_node node;
+ struct list_head list;
+
+ /* last{packets,bytes} members are used when calculating the delta since
+ * last reading
+ */
+ u64 lastpackets;
+ u64 lastbytes;
+
+ u32 id;
+ bool deleted;
+ bool aging;
+
+ struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
+struct mlx5_ft_underlay_qp {
+ struct list_head list;
+ u32 qpn;
+};
+
+#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_800
+/* Calculate the fte_match_param length and without the reserved length.
+ * Make sure the reserved field is the last.
+ */
+#define MLX5_ST_SZ_DW_MATCH_PARAM \
+ ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \
+ BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \
+ MLX5_FLD_SZ_BYTES(fte_match_param, \
+ MLX5_FTE_MATCH_PARAM_RESERVED) +\
+ MLX5_BYTE_OFF(fte_match_param, \
+ MLX5_FTE_MATCH_PARAM_RESERVED)))
+
+/* Type of children is mlx5_flow_rule */
+struct fs_fte {
+ struct fs_node node;
+ u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
+ u32 dests_size;
+ u32 index;
+ struct mlx5_flow_act action;
+ enum fs_fte_status status;
+ struct mlx5_fc *counter;
+ struct rhash_head hash;
+};
+
+/* Type of children is mlx5_flow_table/namespace */
+struct fs_prio {
+ struct fs_node node;
+ unsigned int num_levels;
+ unsigned int start_level;
+ unsigned int prio;
+ unsigned int num_ft;
+};
+
+/* Type of children is fs_prio */
+struct mlx5_flow_namespace {
+ /* parent == NULL => root ns */
+ struct fs_node node;
+};
+
+struct mlx5_flow_group_mask {
+ u8 match_criteria_enable;
+ u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM];
+};
+
+/* Type of children is fs_fte */
+struct mlx5_flow_group {
+ struct fs_node node;
+ struct mlx5_flow_group_mask mask;
+ u32 start_index;
+ u32 max_ftes;
+ struct ida fte_allocator;
+ u32 id;
+ struct rhashtable ftes_hash;
+ struct rhlist_head hash;
+};
+
+struct mlx5_flow_root_namespace {
+ struct mlx5_flow_namespace ns;
+ enum fs_flow_table_type table_type;
+ struct mlx5_core_dev *dev;
+ struct mlx5_flow_table *root_ft;
+ /* Should be held when chaining flow tables */
+ struct mutex chain_lock;
+ struct list_head underlay_qpns;
+ const struct mlx5_flow_cmds *cmds;
+};
+
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
+ struct delayed_work *dwork,
+ unsigned long delay);
+void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
+ unsigned long interval);
+
+int mlx5_init_fs(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
+
+#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
+
+#define fs_list_for_each_entry(pos, root) \
+ list_for_each_entry(pos, root, node.list)
+
+#define fs_list_for_each_entry_safe(pos, tmp, root) \
+ list_for_each_entry_safe(pos, tmp, root, node.list)
+
+#define fs_for_each_ns_or_ft_reverse(pos, prio) \
+ list_for_each_entry_reverse(pos, &(prio)->node.children, list)
+
+#define fs_for_each_ns_or_ft(pos, prio) \
+ list_for_each_entry(pos, (&(prio)->node.children), list)
+
+#define fs_for_each_prio(pos, ns) \
+ fs_list_for_each_entry(pos, &(ns)->node.children)
+
+#define fs_for_each_ns(pos, prio) \
+ fs_list_for_each_entry(pos, &(prio)->node.children)
+
+#define fs_for_each_ft(pos, prio) \
+ fs_list_for_each_entry(pos, &(prio)->node.children)
+
+#define fs_for_each_ft_safe(pos, tmp, prio) \
+ fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children)
+
+#define fs_for_each_fg(pos, ft) \
+ fs_list_for_each_entry(pos, &(ft)->node.children)
+
+#define fs_for_each_fte(pos, fg) \
+ fs_list_for_each_entry(pos, &(fg)->node.children)
+
+#define fs_for_each_dst(pos, fte) \
+ fs_list_for_each_entry(pos, &(fte)->node.children)
+
+#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \
+ (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \
+ (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \
+ (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \
+ (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \
+ (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \
+ (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \
+ (BUILD_BUG_ON_ZERO(FS_FT_SNIFFER_TX != FS_FT_MAX_TYPE))\
+ )
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
new file mode 100644
index 000000000..808ddd732
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+
+#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+/* Max number of counters to query in bulk read is 32K */
+#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+
+/* locking scheme:
+ *
+ * It is the responsibility of the user to prevent concurrent calls or bad
+ * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference
+ * to struct mlx5_fc.
+ * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a
+ * dump (access to struct mlx5_fc) after a counter is destroyed.
+ *
+ * access to counter list:
+ * - create (user context)
+ * - mlx5_fc_create() only adds to an addlist to be used by
+ * mlx5_fc_stats_query_work(). addlist is protected by a spinlock.
+ * - spawn thread to do the actual destroy
+ *
+ * - destroy (user context)
+ * - mark a counter as deleted
+ * - spawn thread to do the actual del
+ *
+ * - dump (user context)
+ * user should not call dump after destroy
+ *
+ * - query (single thread workqueue context)
+ * destroy/dump - no conflict (see destroy)
+ * query/dump - packets and bytes might be inconsistent (since update is not
+ * atomic)
+ * query/create - no conflict (see create)
+ * since every create/destroy spawn the work, only after necessary time has
+ * elapsed, the thread will actually query the hardware.
+ */
+
+static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
+{
+ struct rb_node **new = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*new) {
+ struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node);
+ int result = counter->id - this->id;
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else
+ new = &((*new)->rb_right);
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&counter->node, parent, new);
+ rb_insert_color(&counter->node, root);
+}
+
+/* The function returns the last node that was queried so the caller
+ * function can continue calling it till all counters are queried.
+ */
+static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
+ struct mlx5_fc *first,
+ u32 last_id)
+{
+ struct mlx5_cmd_fc_bulk *b;
+ struct rb_node *node = NULL;
+ u32 afirst_id;
+ int num;
+ int err;
+
+ int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
+ (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+
+ /* first id must be aligned to 4 when using bulk query */
+ afirst_id = first->id & ~0x3;
+
+ /* number of counters to query inc. the last counter */
+ num = ALIGN(last_id - afirst_id + 1, 4);
+ if (num > max_bulk) {
+ num = max_bulk;
+ last_id = afirst_id + num - 1;
+ }
+
+ b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
+ if (!b) {
+ mlx5_core_err(dev, "Error allocating resources for bulk query\n");
+ return NULL;
+ }
+
+ err = mlx5_cmd_fc_bulk_query(dev, b);
+ if (err) {
+ mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+ goto out;
+ }
+
+ for (node = &first->node; node; node = rb_next(node)) {
+ struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
+ struct mlx5_fc_cache *c = &counter->cache;
+ u64 packets;
+ u64 bytes;
+
+ if (counter->id > last_id)
+ break;
+
+ mlx5_cmd_fc_bulk_get(dev, b,
+ counter->id, &packets, &bytes);
+
+ if (c->packets == packets)
+ continue;
+
+ c->packets = packets;
+ c->bytes = bytes;
+ c->lastuse = jiffies;
+ }
+
+out:
+ mlx5_cmd_fc_bulk_free(b);
+
+ return node;
+}
+
+static void mlx5_fc_stats_work(struct work_struct *work)
+{
+ struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
+ priv.fc_stats.work.work);
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ unsigned long now = jiffies;
+ struct mlx5_fc *counter = NULL;
+ struct mlx5_fc *last = NULL;
+ struct rb_node *node;
+ LIST_HEAD(tmplist);
+
+ spin_lock(&fc_stats->addlist_lock);
+
+ list_splice_tail_init(&fc_stats->addlist, &tmplist);
+
+ if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
+ queue_delayed_work(fc_stats->wq, &fc_stats->work,
+ fc_stats->sampling_interval);
+
+ spin_unlock(&fc_stats->addlist_lock);
+
+ list_for_each_entry(counter, &tmplist, list)
+ mlx5_fc_stats_insert(&fc_stats->counters, counter);
+
+ node = rb_first(&fc_stats->counters);
+ while (node) {
+ counter = rb_entry(node, struct mlx5_fc, node);
+
+ node = rb_next(node);
+
+ if (counter->deleted) {
+ rb_erase(&counter->node, &fc_stats->counters);
+
+ mlx5_cmd_fc_free(dev, counter->id);
+
+ kfree(counter);
+ continue;
+ }
+
+ last = counter;
+ }
+
+ if (time_before(now, fc_stats->next_query) || !last)
+ return;
+
+ node = rb_first(&fc_stats->counters);
+ while (node) {
+ counter = rb_entry(node, struct mlx5_fc, node);
+
+ node = mlx5_fc_stats_query(dev, counter, last->id);
+ }
+
+ fc_stats->next_query = now + fc_stats->sampling_interval;
+}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct mlx5_fc *counter;
+ int err;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_cmd_fc_alloc(dev, &counter->id);
+ if (err)
+ goto err_out;
+
+ if (aging) {
+ counter->cache.lastuse = jiffies;
+ counter->aging = true;
+
+ spin_lock(&fc_stats->addlist_lock);
+ list_add(&counter->list, &fc_stats->addlist);
+ spin_unlock(&fc_stats->addlist_lock);
+
+ mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+ }
+
+ return counter;
+
+err_out:
+ kfree(counter);
+
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(mlx5_fc_create);
+
+void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ if (!counter)
+ return;
+
+ if (counter->aging) {
+ counter->deleted = true;
+ mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+ return;
+ }
+
+ mlx5_cmd_fc_free(dev, counter->id);
+ kfree(counter);
+}
+EXPORT_SYMBOL(mlx5_fc_destroy);
+
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ fc_stats->counters = RB_ROOT;
+ INIT_LIST_HEAD(&fc_stats->addlist);
+ spin_lock_init(&fc_stats->addlist_lock);
+
+ fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
+ if (!fc_stats->wq)
+ return -ENOMEM;
+
+ fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
+ INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
+
+ return 0;
+}
+
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct mlx5_fc *counter;
+ struct mlx5_fc *tmp;
+ struct rb_node *node;
+
+ cancel_delayed_work_sync(&dev->priv.fc_stats.work);
+ destroy_workqueue(dev->priv.fc_stats.wq);
+ dev->priv.fc_stats.wq = NULL;
+
+ list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
+ list_del(&counter->list);
+
+ mlx5_cmd_fc_free(dev, counter->id);
+
+ kfree(counter);
+ }
+
+ node = rb_first(&fc_stats->counters);
+ while (node) {
+ counter = rb_entry(node, struct mlx5_fc, node);
+
+ node = rb_next(node);
+
+ rb_erase(&counter->node, &fc_stats->counters);
+
+ mlx5_cmd_fc_free(dev, counter->id);
+
+ kfree(counter);
+ }
+}
+
+int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
+ u64 *packets, u64 *bytes)
+{
+ return mlx5_cmd_fc_query(dev, counter->id, packets, bytes);
+}
+EXPORT_SYMBOL(mlx5_fc_query);
+
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter)
+{
+ return counter->cache.lastuse;
+}
+
+void mlx5_fc_query_cached(struct mlx5_fc *counter,
+ u64 *bytes, u64 *packets, u64 *lastuse)
+{
+ struct mlx5_fc_cache c;
+
+ c = counter->cache;
+
+ *bytes = c.bytes - counter->lastbytes;
+ *packets = c.packets - counter->lastpackets;
+ *lastuse = c.lastuse;
+
+ counter->lastbytes = c.bytes;
+ counter->lastpackets = c.packets;
+}
+
+void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ queue_delayed_work(fc_stats->wq, dwork, delay);
+}
+
+void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
+ unsigned long interval)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ fc_stats->sampling_interval = min_t(unsigned long, interval,
+ fc_stats->sampling_interval);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
new file mode 100644
index 000000000..41ad24f0d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/module.h>
+#include "mlx5_core.h"
+#include "../../mlxfw/mlxfw.h"
+
+static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
+ int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {0};
+
+ MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_query_board_id(struct mlx5_core_dev *dev)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_cmd_query_adapter(dev, out, outlen);
+ if (err)
+ goto out;
+
+ memcpy(dev->board_id,
+ MLX5_ADDR_OF(query_adapter_out, out,
+ query_adapter_struct.vsd_contd_psid),
+ MLX5_FLD_SZ_BYTES(query_adapter_out,
+ query_adapter_struct.vsd_contd_psid));
+
+out:
+ kfree(out);
+ return err;
+}
+
+int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_cmd_query_adapter(mdev, out, outlen);
+ if (err)
+ goto out;
+
+ *vendor_id = MLX5_GET(query_adapter_out, out,
+ query_adapter_struct.ieee_vendor_id);
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_query_vendor_id);
+
+static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev)
+{
+ return mlx5_query_pcam_reg(dev, dev->caps.pcam,
+ MLX5_PCAM_FEATURE_ENHANCED_FEATURES,
+ MLX5_PCAM_REGS_5000_TO_507F);
+}
+
+static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev)
+{
+ return mlx5_query_mcam_reg(dev, dev->caps.mcam,
+ MLX5_MCAM_FEATURE_ENHANCED_FEATURES,
+ MLX5_MCAM_REGS_FIRST_128);
+}
+
+static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev)
+{
+ return mlx5_query_qcam_reg(dev, dev->caps.qcam,
+ MLX5_QCAM_FEATURE_ENHANCED_FEATURES,
+ MLX5_QCAM_REGS_FIRST_128);
+}
+
+int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, pg)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, atomic)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, roce)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, nic_flow_table) ||
+ MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vport_group_manager) &&
+ MLX5_ESWITCH_MANAGER(dev)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_ESWITCH_MANAGER(dev)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vector_calc)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, qos)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_QOS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, debug))
+ mlx5_core_get_caps(dev, MLX5_CAP_DEBUG);
+
+ if (MLX5_CAP_GEN(dev, pcam_reg))
+ mlx5_get_pcam_reg(dev);
+
+ if (MLX5_CAP_GEN(dev, mcam_reg))
+ mlx5_get_mcam_reg(dev);
+
+ if (MLX5_CAP_GEN(dev, qcam_reg))
+ mlx5_get_qcam_reg(dev);
+
+ if (MLX5_CAP_GEN(dev, device_memory)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_MEM);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
+{
+ u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0};
+ int i;
+
+ MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
+
+ if (MLX5_CAP_GEN(dev, sw_owner_id)) {
+ for (i = 0; i < 4; i++)
+ MLX5_ARRAY_SET(init_hca_in, in, sw_owner_id, i,
+ sw_owner_id[i]);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+
+ MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+ int force_state;
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev, force_teardown)) {
+ mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ MLX5_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE);
+
+ ret = mlx5_cmd_exec_polling(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ force_state = MLX5_GET(teardown_hca_out, out, force_state);
+ if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
+ mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+enum mlxsw_reg_mcc_instruction {
+ MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01,
+ MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02,
+ MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03,
+ MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04,
+ MLX5_REG_MCC_INSTRUCTION_ACTIVATE = 0x06,
+ MLX5_REG_MCC_INSTRUCTION_CANCEL = 0x08,
+};
+
+static int mlx5_reg_mcc_set(struct mlx5_core_dev *dev,
+ enum mlxsw_reg_mcc_instruction instr,
+ u16 component_index, u32 update_handle,
+ u32 component_size)
+{
+ u32 out[MLX5_ST_SZ_DW(mcc_reg)];
+ u32 in[MLX5_ST_SZ_DW(mcc_reg)];
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(mcc_reg, in, instruction, instr);
+ MLX5_SET(mcc_reg, in, component_index, component_index);
+ MLX5_SET(mcc_reg, in, update_handle, update_handle);
+ MLX5_SET(mcc_reg, in, component_size, component_size);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCC, 0, 1);
+}
+
+static int mlx5_reg_mcc_query(struct mlx5_core_dev *dev,
+ u32 *update_handle, u8 *error_code,
+ u8 *control_state)
+{
+ u32 out[MLX5_ST_SZ_DW(mcc_reg)];
+ u32 in[MLX5_ST_SZ_DW(mcc_reg)];
+ int err;
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+ MLX5_SET(mcc_reg, in, update_handle, *update_handle);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCC, 0, 0);
+ if (err)
+ goto out;
+
+ *update_handle = MLX5_GET(mcc_reg, out, update_handle);
+ *error_code = MLX5_GET(mcc_reg, out, error_code);
+ *control_state = MLX5_GET(mcc_reg, out, control_state);
+
+out:
+ return err;
+}
+
+static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev,
+ u32 update_handle,
+ u32 offset, u16 size,
+ u8 *data)
+{
+ int err, in_size = MLX5_ST_SZ_BYTES(mcda_reg) + size;
+ u32 out[MLX5_ST_SZ_DW(mcda_reg)];
+ int i, j, dw_size = size >> 2;
+ __be32 data_element;
+ u32 *in;
+
+ in = kzalloc(in_size, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(mcda_reg, in, update_handle, update_handle);
+ MLX5_SET(mcda_reg, in, offset, offset);
+ MLX5_SET(mcda_reg, in, size, size);
+
+ for (i = 0; i < dw_size; i++) {
+ j = i * 4;
+ data_element = htonl(*(u32 *)&data[j]);
+ memcpy(MLX5_ADDR_OF(mcda_reg, in, data) + j, &data_element, 4);
+ }
+
+ err = mlx5_core_access_reg(dev, in, in_size, out,
+ sizeof(out), MLX5_REG_MCDA, 0, 1);
+ kfree(in);
+ return err;
+}
+
+static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev,
+ u16 component_index,
+ u32 *max_component_size,
+ u8 *log_mcda_word_size,
+ u16 *mcda_max_write_size)
+{
+ u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)];
+ int offset = MLX5_ST_SZ_DW(mcqi_reg);
+ u32 in[MLX5_ST_SZ_DW(mcqi_reg)];
+ int err;
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+
+ MLX5_SET(mcqi_reg, in, component_index, component_index);
+ MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap));
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCQI, 0, 0);
+ if (err)
+ goto out;
+
+ *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size);
+ *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size);
+ *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size);
+
+out:
+ return err;
+}
+
+struct mlx5_mlxfw_dev {
+ struct mlxfw_dev mlxfw_dev;
+ struct mlx5_core_dev *mlx5_core_dev;
+};
+
+static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev,
+ u16 component_index, u32 *p_max_size,
+ u8 *p_align_bits, u16 *p_max_write_size)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcqi_query(dev, component_index, p_max_size,
+ p_align_bits, p_max_write_size);
+}
+
+static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+ u8 control_state, error_code;
+ int err;
+
+ *fwhandle = 0;
+ err = mlx5_reg_mcc_query(dev, fwhandle, &error_code, &control_state);
+ if (err)
+ return err;
+
+ if (control_state != MLXFW_FSM_STATE_IDLE)
+ return -EBUSY;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE,
+ 0, *fwhandle, 0);
+}
+
+static int mlx5_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u16 component_index, u32 component_size)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT,
+ component_index, fwhandle, component_size);
+}
+
+static int mlx5_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u8 *data, u16 size, u32 offset)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcda_set(dev, fwhandle, offset, size, data);
+}
+
+static int mlx5_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u16 component_index)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT,
+ component_index, fwhandle, 0);
+}
+
+static int mlx5_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_ACTIVATE, 0,
+ fwhandle, 0);
+}
+
+static int mlx5_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ enum mlxfw_fsm_state *fsm_state,
+ enum mlxfw_fsm_state_err *fsm_state_err)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+ u8 control_state, error_code;
+ int err;
+
+ err = mlx5_reg_mcc_query(dev, &fwhandle, &error_code, &control_state);
+ if (err)
+ return err;
+
+ *fsm_state = control_state;
+ *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code,
+ MLXFW_FSM_STATE_ERR_MAX);
+ return 0;
+}
+
+static void mlx5_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0);
+}
+
+static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0,
+ fwhandle, 0);
+}
+
+static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = {
+ .component_query = mlx5_component_query,
+ .fsm_lock = mlx5_fsm_lock,
+ .fsm_component_update = mlx5_fsm_component_update,
+ .fsm_block_download = mlx5_fsm_block_download,
+ .fsm_component_verify = mlx5_fsm_component_verify,
+ .fsm_activate = mlx5_fsm_activate,
+ .fsm_query_state = mlx5_fsm_query_state,
+ .fsm_cancel = mlx5_fsm_cancel,
+ .fsm_release = mlx5_fsm_release
+};
+
+int mlx5_firmware_flash(struct mlx5_core_dev *dev,
+ const struct firmware *firmware)
+{
+ struct mlx5_mlxfw_dev mlx5_mlxfw_dev = {
+ .mlxfw_dev = {
+ .ops = &mlx5_mlxfw_dev_ops,
+ .psid = dev->board_id,
+ .psid_size = strlen(dev->board_id),
+ },
+ .mlx5_core_dev = dev
+ };
+
+ if (!MLX5_CAP_GEN(dev, mcam_reg) ||
+ !MLX5_CAP_MCAM_REG(dev, mcqi) ||
+ !MLX5_CAP_MCAM_REG(dev, mcc) ||
+ !MLX5_CAP_MCAM_REG(dev, mcda)) {
+ pr_info("%s flashing isn't supported by the running FW\n", __func__);
+ return -EOPNOTSUPP;
+ }
+
+ return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
new file mode 100644
index 000000000..9f39aeca8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/vmalloc.h>
+#include <linux/hardirq.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+ MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
+ MAX_MISSES = 3,
+};
+
+enum {
+ MLX5_HEALTH_SYNDR_FW_ERR = 0x1,
+ MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7,
+ MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR = 0x8,
+ MLX5_HEALTH_SYNDR_CRC_ERR = 0x9,
+ MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa,
+ MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb,
+ MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc,
+ MLX5_HEALTH_SYNDR_EQ_ERR = 0xd,
+ MLX5_HEALTH_SYNDR_EQ_INV = 0xe,
+ MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf,
+ MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10
+};
+
+enum {
+ MLX5_NIC_IFC_FULL = 0,
+ MLX5_NIC_IFC_DISABLED = 1,
+ MLX5_NIC_IFC_NO_DRAM_NIC = 2,
+ MLX5_NIC_IFC_INVALID = 3
+};
+
+enum {
+ MLX5_DROP_NEW_HEALTH_WORK,
+ MLX5_DROP_NEW_RECOVERY_WORK,
+};
+
+static u8 get_nic_state(struct mlx5_core_dev *dev)
+{
+ return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
+}
+
+static void trigger_cmd_completions(struct mlx5_core_dev *dev)
+{
+ unsigned long flags;
+ u64 vector;
+
+ /* wait for pending handlers to complete */
+ synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
+ spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+ vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+ if (!vector)
+ goto no_trig;
+
+ vector |= MLX5_TRIGGERED_CMD_COMP;
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+ mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+ mlx5_cmd_comp_handler(dev, vector, true);
+ return;
+
+no_trig:
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
+
+static int in_fatal(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+
+ if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ return 1;
+
+ if (ioread32be(&h->fw_ver) == 0xffffffff)
+ return 1;
+
+ return 0;
+}
+
+void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
+{
+ mutex_lock(&dev->intf_state_mutex);
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto unlock;
+
+ mlx5_core_err(dev, "start\n");
+ if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ trigger_cmd_completions(dev);
+ }
+
+ mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
+ mlx5_core_err(dev, "end\n");
+
+unlock:
+ mutex_unlock(&dev->intf_state_mutex);
+}
+
+static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
+{
+ u8 nic_interface = get_nic_state(dev);
+
+ switch (nic_interface) {
+ case MLX5_NIC_IFC_FULL:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
+ break;
+
+ case MLX5_NIC_IFC_DISABLED:
+ mlx5_core_warn(dev, "starting teardown\n");
+ break;
+
+ case MLX5_NIC_IFC_NO_DRAM_NIC:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
+ break;
+ default:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
+ nic_interface);
+ }
+
+ mlx5_disable_device(dev);
+}
+
+static void health_recover(struct work_struct *work)
+{
+ struct mlx5_core_health *health;
+ struct delayed_work *dwork;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ u8 nic_state;
+
+ dwork = container_of(work, struct delayed_work, work);
+ health = container_of(dwork, struct mlx5_core_health, recover_work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ nic_state = get_nic_state(dev);
+ if (nic_state == MLX5_NIC_IFC_INVALID) {
+ dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n");
+ return;
+ }
+
+ dev_err(&dev->pdev->dev, "starting health recovery flow\n");
+ mlx5_recover_device(dev);
+}
+
+/* How much time to wait until health resetting the driver (in msecs) */
+#define MLX5_RECOVERY_DELAY_MSECS 60000
+static void health_care(struct work_struct *work)
+{
+ unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS);
+ struct mlx5_core_health *health;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ unsigned long flags;
+
+ health = container_of(work, struct mlx5_core_health, work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+ mlx5_core_warn(dev, "handling bad device here\n");
+ mlx5_handle_bad_state(dev);
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
+ schedule_delayed_work(&health->recover_work, recover_delay);
+ else
+ dev_err(&dev->pdev->dev,
+ "new health works are not permitted at this stage\n");
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+}
+
+static const char *hsynd_str(u8 synd)
+{
+ switch (synd) {
+ case MLX5_HEALTH_SYNDR_FW_ERR:
+ return "firmware internal error";
+ case MLX5_HEALTH_SYNDR_IRISC_ERR:
+ return "irisc not responding";
+ case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
+ return "unrecoverable hardware error";
+ case MLX5_HEALTH_SYNDR_CRC_ERR:
+ return "firmware CRC error";
+ case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
+ return "ICM fetch PCI error";
+ case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
+ return "HW fatal error\n";
+ case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
+ return "async EQ buffer overrun";
+ case MLX5_HEALTH_SYNDR_EQ_ERR:
+ return "EQ error";
+ case MLX5_HEALTH_SYNDR_EQ_INV:
+ return "Invalid EQ referenced";
+ case MLX5_HEALTH_SYNDR_FFSER_ERR:
+ return "FFSER error";
+ case MLX5_HEALTH_SYNDR_HIGH_TEMP:
+ return "High temperature";
+ default:
+ return "unrecognized error";
+ }
+}
+
+static void print_health_info(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ char fw_str[18];
+ u32 fw;
+ int i;
+
+ /* If the syndrome is 0, the device is OK and no need to print buffer */
+ if (!ioread8(&h->synd))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
+ dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
+
+ dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
+ dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
+ sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
+ dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str);
+ dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+ dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index));
+ dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd)));
+ dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
+ fw = ioread32be(&h->fw_ver);
+ dev_err(&dev->pdev->dev, "raw fw_ver 0x%08x\n", fw);
+}
+
+static unsigned long get_next_poll_jiffies(void)
+{
+ unsigned long next;
+
+ get_random_bytes(&next, sizeof(next));
+ next %= HZ;
+ next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+
+ return next;
+}
+
+void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+ queue_work(health->wq, &health->work);
+ else
+ dev_err(&dev->pdev->dev,
+ "new health works are not permitted at this stage\n");
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+}
+
+static void poll_health(struct timer_list *t)
+{
+ struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
+ struct mlx5_core_health *health = &dev->priv.health;
+ u32 count;
+
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto out;
+
+ count = ioread32be(health->health_counter);
+ if (count == health->prev)
+ ++health->miss_counter;
+ else
+ health->miss_counter = 0;
+
+ health->prev = count;
+ if (health->miss_counter == MAX_MISSES) {
+ dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
+ print_health_info(dev);
+ }
+
+ if (in_fatal(dev) && !health->sick) {
+ health->sick = true;
+ print_health_info(dev);
+ mlx5_trigger_health_work(dev);
+ }
+
+out:
+ mod_timer(&health->timer, get_next_poll_jiffies());
+}
+
+void mlx5_start_health_poll(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ timer_setup(&health->timer, poll_health, 0);
+ health->sick = 0;
+ clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+ health->health = &dev->iseg->health;
+ health->health_counter = &dev->iseg->health_counter;
+
+ health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
+ add_timer(&health->timer);
+}
+
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ if (disable_health) {
+ spin_lock_irqsave(&health->wq_lock, flags);
+ set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+ }
+
+ del_timer_sync(&health->timer);
+}
+
+void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+ cancel_delayed_work_sync(&health->recover_work);
+ cancel_work_sync(&health->work);
+}
+
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+ cancel_delayed_work_sync(&dev->priv.health.recover_work);
+}
+
+void mlx5_health_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ destroy_workqueue(health->wq);
+}
+
+int mlx5_health_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health;
+ char *name;
+
+ health = &dev->priv.health;
+ name = kmalloc(64, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ strcpy(name, "mlx5_health");
+ strcat(name, dev_name(&dev->pdev->dev));
+ health->wq = create_singlethread_workqueue(name);
+ kfree(name);
+ if (!health->wq)
+ return -ENOMEM;
+ spin_lock_init(&health->wq_lock);
+ INIT_WORK(&health->work, health_care);
+ INIT_DELAYED_WORK(&health->recover_work, health_recover);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
new file mode 100644
index 000000000..90cb50fe1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+#include "ipoib.h"
+
+static void mlx5i_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_drvinfo(priv, drvinfo);
+ strlcpy(drvinfo->driver, DRIVER_NAME "[ib_ipoib]",
+ sizeof(drvinfo->driver));
+}
+
+static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_strings(priv, stringset, data);
+}
+
+static int mlx5i_get_sset_count(struct net_device *dev, int sset)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ return mlx5e_ethtool_get_sset_count(priv, sset);
+}
+
+static void mlx5i_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
+}
+
+static int mlx5i_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ return mlx5e_ethtool_set_ringparam(priv, param);
+}
+
+static void mlx5i_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_ringparam(priv, param);
+}
+
+static int mlx5i_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ return mlx5e_ethtool_set_channels(priv, ch);
+}
+
+static void mlx5i_get_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_channels(priv, ch);
+}
+
+static int mlx5i_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal);
+}
+
+static int mlx5i_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal);
+}
+
+static int mlx5i_get_ts_info(struct net_device *netdev,
+ struct ethtool_ts_info *info)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_get_ts_info(priv, info);
+}
+
+static int mlx5i_flash_device(struct net_device *netdev,
+ struct ethtool_flash *flash)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_flash_device(priv, flash);
+}
+
+enum mlx5_ptys_width {
+ MLX5_PTYS_WIDTH_1X = 1 << 0,
+ MLX5_PTYS_WIDTH_2X = 1 << 1,
+ MLX5_PTYS_WIDTH_4X = 1 << 2,
+ MLX5_PTYS_WIDTH_8X = 1 << 3,
+ MLX5_PTYS_WIDTH_12X = 1 << 4,
+};
+
+static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width)
+{
+ switch (width) {
+ case MLX5_PTYS_WIDTH_1X: return 1;
+ case MLX5_PTYS_WIDTH_2X: return 2;
+ case MLX5_PTYS_WIDTH_4X: return 4;
+ case MLX5_PTYS_WIDTH_8X: return 8;
+ case MLX5_PTYS_WIDTH_12X: return 12;
+ default: return -1;
+ }
+}
+
+enum mlx5_ptys_rate {
+ MLX5_PTYS_RATE_SDR = 1 << 0,
+ MLX5_PTYS_RATE_DDR = 1 << 1,
+ MLX5_PTYS_RATE_QDR = 1 << 2,
+ MLX5_PTYS_RATE_FDR10 = 1 << 3,
+ MLX5_PTYS_RATE_FDR = 1 << 4,
+ MLX5_PTYS_RATE_EDR = 1 << 5,
+ MLX5_PTYS_RATE_HDR = 1 << 6,
+};
+
+static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
+{
+ switch (rate) {
+ case MLX5_PTYS_RATE_SDR: return 2500;
+ case MLX5_PTYS_RATE_DDR: return 5000;
+ case MLX5_PTYS_RATE_QDR:
+ case MLX5_PTYS_RATE_FDR10: return 10000;
+ case MLX5_PTYS_RATE_FDR: return 14000;
+ case MLX5_PTYS_RATE_EDR: return 25000;
+ case MLX5_PTYS_RATE_HDR: return 50000;
+ default: return -1;
+ }
+}
+
+static int mlx5i_get_port_settings(struct net_device *netdev,
+ u16 *ib_link_width_oper, u16 *ib_proto_oper)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+ int ret;
+
+ ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1);
+ if (ret)
+ return ret;
+
+ *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
+ *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
+
+ return 0;
+}
+
+static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
+{
+ int rate, width;
+
+ rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper);
+ if (rate < 0)
+ return -EINVAL;
+ width = mlx5_ptys_width_enum_to_int(ib_link_width_oper);
+ if (width < 0)
+ return -EINVAL;
+
+ return rate * width;
+}
+
+static int mlx5i_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ u16 ib_link_width_oper;
+ u16 ib_proto_oper;
+ int speed, ret;
+
+ ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper);
+ if (ret)
+ return ret;
+
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+
+ speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper);
+ if (speed < 0)
+ return -EINVAL;
+
+ link_ksettings->base.duplex = DUPLEX_FULL;
+ link_ksettings->base.port = PORT_OTHER;
+
+ link_ksettings->base.autoneg = AUTONEG_DISABLE;
+
+ link_ksettings->base.speed = speed;
+
+ return 0;
+}
+
+const struct ethtool_ops mlx5i_ethtool_ops = {
+ .get_drvinfo = mlx5i_get_drvinfo,
+ .get_strings = mlx5i_get_strings,
+ .get_sset_count = mlx5i_get_sset_count,
+ .get_ethtool_stats = mlx5i_get_ethtool_stats,
+ .get_ringparam = mlx5i_get_ringparam,
+ .set_ringparam = mlx5i_set_ringparam,
+ .flash_device = mlx5i_flash_device,
+ .get_channels = mlx5i_get_channels,
+ .set_channels = mlx5i_set_channels,
+ .get_coalesce = mlx5i_get_coalesce,
+ .set_coalesce = mlx5i_set_coalesce,
+ .get_ts_info = mlx5i_get_ts_info,
+ .get_link_ksettings = mlx5i_get_link_ksettings,
+ .get_link = ethtool_op_get_link,
+};
+
+const struct ethtool_ops mlx5i_pkey_ethtool_ops = {
+ .get_drvinfo = mlx5i_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_ts_info = mlx5i_get_ts_info,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
new file mode 100644
index 000000000..db6aafcce
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -0,0 +1,697 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <linux/mlx5/fs.h>
+#include "en.h"
+#include "ipoib.h"
+
+#define IB_DEFAULT_Q_KEY 0xb1b
+#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9
+
+static int mlx5i_open(struct net_device *netdev);
+static int mlx5i_close(struct net_device *netdev);
+static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu);
+
+static const struct net_device_ops mlx5i_netdev_ops = {
+ .ndo_open = mlx5i_open,
+ .ndo_stop = mlx5i_close,
+ .ndo_init = mlx5i_dev_init,
+ .ndo_uninit = mlx5i_dev_cleanup,
+ .ndo_change_mtu = mlx5i_change_mtu,
+ .ndo_do_ioctl = mlx5i_ioctl,
+};
+
+/* IPoIB mlx5 netdev profile */
+static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false);
+ mlx5e_set_rq_type(mdev, params);
+ mlx5e_init_rq_type_params(mdev, params);
+
+ /* RQ size in ipoib by default is 512 */
+ params->log_rq_mtu_frames = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+ MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+ params->lro_en = false;
+ params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
+}
+
+/* Called directly after IPoIB netdevice was created to initialize SW structs */
+void mlx5i_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ u16 max_mtu;
+
+ /* priv init */
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->profile = profile;
+ priv->ppriv = ppriv;
+ mutex_init(&priv->state_lock);
+
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->mtu = max_mtu;
+
+ mlx5e_build_nic_params(mdev, &priv->channels.params,
+ profile->max_nch(mdev), netdev->mtu);
+ mlx5i_build_nic_params(mdev, &priv->channels.params);
+
+ mlx5e_timestamp_init(priv);
+
+ /* netdev init */
+ netdev->hw_features |= NETIF_F_SG;
+ netdev->hw_features |= NETIF_F_IP_CSUM;
+ netdev->hw_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_features |= NETIF_F_GRO;
+ netdev->hw_features |= NETIF_F_TSO;
+ netdev->hw_features |= NETIF_F_TSO6;
+ netdev->hw_features |= NETIF_F_RXCSUM;
+ netdev->hw_features |= NETIF_F_RXHASH;
+
+ netdev->netdev_ops = &mlx5i_netdev_ops;
+ netdev->ethtool_ops = &mlx5i_ethtool_ops;
+}
+
+/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
+static void mlx5i_cleanup(struct mlx5e_priv *priv)
+{
+ /* Do nothing .. */
+}
+
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_qp *qp = &ipriv->qp;
+ struct mlx5_qp_context *context;
+ int ret;
+
+ /* QP states */
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return -ENOMEM;
+
+ context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+ context->pri_path.port = 1;
+ context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index);
+ context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY);
+
+ ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret);
+ goto err_qp_modify_to_err;
+ }
+ memset(context, 0, sizeof(*context));
+
+ ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret);
+ goto err_qp_modify_to_err;
+ }
+
+ ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret);
+ goto err_qp_modify_to_err;
+ }
+
+ kfree(context);
+ return 0;
+
+err_qp_modify_to_err:
+ mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp);
+ kfree(context);
+ return ret;
+}
+
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_qp_context context;
+ int err;
+
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context,
+ &ipriv->qp);
+ if (err)
+ mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err);
+}
+
+#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
+
+int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+{
+ u32 *in = NULL;
+ void *addr_path;
+ int ret = 0;
+ int inlen;
+ void *qpc;
+
+ inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, ulp_stateless_offload_mode,
+ MLX5_QP_ENHANCED_ULP_STATELESS_MODE);
+
+ addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
+ MLX5_SET(ads, addr_path, vhca_port_num, 1);
+ MLX5_SET(ads, addr_path, grh, 1);
+
+ ret = mlx5_core_create_qp(mdev, qp, in, inlen);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed creating IPoIB QP err : %d\n", ret);
+ goto out;
+ }
+
+out:
+ kvfree(in);
+ return ret;
+}
+
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+{
+ mlx5_core_destroy_qp(mdev, qp);
+}
+
+static int mlx5i_init_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ int err;
+
+ err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
+ goto err_destroy_underlay_qp;
+ }
+
+ return 0;
+
+err_destroy_underlay_qp:
+ mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+ return err;
+}
+
+static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[0]);
+ mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+}
+
+static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
+{
+ struct ttc_params ttc_params = {};
+ int tt, err;
+
+ priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+
+ if (!priv->fs.ns)
+ return -EINVAL;
+
+ err = mlx5e_arfs_create_tables(priv);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
+ err);
+ priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
+ }
+
+ mlx5e_set_ttc_basic_params(priv, &ttc_params);
+ mlx5e_set_inner_ttc_ft_params(&ttc_params);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
+
+ err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
+ err);
+ goto err_destroy_arfs_tables;
+ }
+
+ mlx5e_set_ttc_ft_params(&ttc_params);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
+
+ err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
+ err);
+ goto err_destroy_inner_ttc_table;
+ }
+
+ return 0;
+
+err_destroy_inner_ttc_table:
+ mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+err_destroy_arfs_tables:
+ mlx5e_arfs_destroy_tables(priv);
+
+ return err;
+}
+
+static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+ mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_arfs_destroy_tables(priv);
+}
+
+static int mlx5i_init_rx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_create_indirect_rqt(priv);
+ if (err)
+ return err;
+
+ err = mlx5e_create_direct_rqts(priv);
+ if (err)
+ goto err_destroy_indirect_rqts;
+
+ err = mlx5e_create_indirect_tirs(priv);
+ if (err)
+ goto err_destroy_direct_rqts;
+
+ err = mlx5e_create_direct_tirs(priv);
+ if (err)
+ goto err_destroy_indirect_tirs;
+
+ err = mlx5i_create_flow_steering(priv);
+ if (err)
+ goto err_destroy_direct_tirs;
+
+ return 0;
+
+err_destroy_direct_tirs:
+ mlx5e_destroy_direct_tirs(priv);
+err_destroy_indirect_tirs:
+ mlx5e_destroy_indirect_tirs(priv);
+err_destroy_direct_rqts:
+ mlx5e_destroy_direct_rqts(priv);
+err_destroy_indirect_rqts:
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ return err;
+}
+
+static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
+{
+ mlx5i_destroy_flow_steering(priv);
+ mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv);
+ mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+}
+
+static const struct mlx5e_profile mlx5i_nic_profile = {
+ .init = mlx5i_init,
+ .cleanup = mlx5i_cleanup,
+ .init_tx = mlx5i_init_tx,
+ .cleanup_tx = mlx5i_cleanup_tx,
+ .init_rx = mlx5i_init_rx,
+ .cleanup_rx = mlx5i_cleanup_rx,
+ .enable = NULL, /* mlx5i_enable */
+ .disable = NULL, /* mlx5i_disable */
+ .update_stats = NULL, /* mlx5i_update_stats */
+ .max_nch = mlx5e_get_max_num_channels,
+ .update_carrier = NULL, /* no HW update in IB link */
+ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
+ .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
+ .max_tc = MLX5I_MAX_NUM_TC,
+};
+
+/* mlx5i netdev NDos */
+
+static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5e_channels new_channels = {};
+ struct mlx5e_params *params;
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ params = &priv->channels.params;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ params->sw_mtu = new_mtu;
+ netdev->mtu = params->sw_mtu;
+ goto out;
+ }
+
+ new_channels.params = *params;
+ new_channels.params.sw_mtu = new_mtu;
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ netdev->mtu = new_channels.params.sw_mtu;
+
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+int mlx5i_dev_init(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ /* Set dev address using underlay QP */
+ dev->dev_addr[1] = (ipriv->qp.qpn >> 16) & 0xff;
+ dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff;
+ dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff;
+
+ /* Add QPN to net-device mapping to HT */
+ mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn);
+
+ return 0;
+}
+
+int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return mlx5e_hwstamp_set(priv, ifr);
+ case SIOCGHWTSTAMP:
+ return mlx5e_hwstamp_get(priv, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+void mlx5i_dev_cleanup(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5i_uninit_underlay_qp(priv);
+
+ /* Delete QPN to net-device mapping from HT */
+ mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn);
+}
+
+static int mlx5i_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ int err;
+
+ mutex_lock(&epriv->state_lock);
+
+ set_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ err = mlx5i_init_underlay_qp(epriv);
+ if (err) {
+ mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err);
+ goto err_clear_state_opened_flag;
+ }
+
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ if (err) {
+ mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err);
+ goto err_reset_qp;
+ }
+
+ err = mlx5e_open_channels(epriv, &epriv->channels);
+ if (err)
+ goto err_remove_fs_underlay_qp;
+
+ mlx5e_refresh_tirs(epriv, false);
+ mlx5e_activate_priv_channels(epriv);
+
+ mutex_unlock(&epriv->state_lock);
+ return 0;
+
+err_remove_fs_underlay_qp:
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+err_reset_qp:
+ mlx5i_uninit_underlay_qp(epriv);
+err_clear_state_opened_flag:
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+ mutex_unlock(&epriv->state_lock);
+ return err;
+}
+
+static int mlx5i_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+
+ /* May already be CLOSED in case a previous configuration operation
+ * (e.g RX/TX queue size change) that involves close&open failed.
+ */
+ mutex_lock(&epriv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &epriv->state))
+ goto unlock;
+
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ netif_carrier_off(epriv->netdev);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5e_deactivate_priv_channels(epriv);
+ mlx5e_close_channels(&epriv->channels);
+ mlx5i_uninit_underlay_qp(epriv);
+unlock:
+ mutex_unlock(&epriv->state_lock);
+ return 0;
+}
+
+/* IPoIB RDMA netdev callbacks */
+static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
+ union ib_gid *gid, u16 lid, int set_qkey,
+ u32 qkey)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ int err;
+
+ mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
+ err = mlx5_core_attach_mcg(mdev, gid, ipriv->qp.qpn);
+ if (err)
+ mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n",
+ ipriv->qp.qpn, gid->raw);
+
+ if (set_qkey) {
+ mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n",
+ netdev->name, qkey);
+ ipriv->qkey = qkey;
+ }
+
+ return err;
+}
+
+static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
+ union ib_gid *gid, u16 lid)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ int err;
+
+ mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
+
+ err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn);
+ if (err)
+ mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n",
+ ipriv->qp.qpn, gid->raw);
+
+ return err;
+}
+
+static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(dev);
+ struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)];
+ struct mlx5_ib_ah *mah = to_mah(address);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+
+ return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey);
+}
+
+static void mlx5i_set_pkey_index(struct net_device *netdev, int id)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+ ipriv->pkey_index = (u16)id;
+}
+
+static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
+{
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
+ mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void mlx5_rdma_netdev_free(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ const struct mlx5e_profile *profile = priv->profile;
+
+ mlx5e_detach_netdev(priv);
+ profile->cleanup(priv);
+ destroy_workqueue(priv->wq);
+
+ if (!ipriv->sub_interface) {
+ mlx5i_pkey_qpn_ht_cleanup(netdev);
+ mlx5e_destroy_mdev_resources(priv->mdev);
+ }
+}
+
+struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
+ struct ib_device *ibdev,
+ const char *name,
+ void (*setup)(struct net_device *))
+{
+ const struct mlx5e_profile *profile;
+ struct net_device *netdev;
+ struct mlx5i_priv *ipriv;
+ struct mlx5e_priv *epriv;
+ struct rdma_netdev *rn;
+ bool sub_interface;
+ int nch;
+ int err;
+
+ if (mlx5i_check_required_hca_cap(mdev)) {
+ mlx5_core_warn(mdev, "Accelerated mode is not supported\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* TODO: Need to find a better way to check if child device*/
+ sub_interface = (mdev->mlx5e_res.pdn != 0);
+
+ if (sub_interface)
+ profile = mlx5i_pkey_get_profile();
+ else
+ profile = &mlx5i_nic_profile;
+
+ nch = profile->max_nch(mdev);
+
+ netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv),
+ name, NET_NAME_UNKNOWN,
+ setup,
+ nch * MLX5E_MAX_NUM_TC,
+ nch);
+ if (!netdev) {
+ mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n");
+ return NULL;
+ }
+
+ ipriv = netdev_priv(netdev);
+ epriv = mlx5i_epriv(netdev);
+
+ epriv->wq = create_singlethread_workqueue("mlx5i");
+ if (!epriv->wq)
+ goto err_free_netdev;
+
+ ipriv->sub_interface = sub_interface;
+ if (!ipriv->sub_interface) {
+ err = mlx5i_pkey_qpn_ht_init(netdev);
+ if (err) {
+ mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n");
+ goto destroy_wq;
+ }
+
+ /* This should only be called once per mdev */
+ err = mlx5e_create_mdev_resources(mdev);
+ if (err)
+ goto destroy_ht;
+ }
+
+ profile->init(mdev, netdev, profile, ipriv);
+
+ err = mlx5e_attach_netdev(epriv);
+ if (err)
+ goto detach;
+ netif_carrier_off(netdev);
+
+ /* set rdma_netdev func pointers */
+ rn = &ipriv->rn;
+ rn->hca = ibdev;
+ rn->send = mlx5i_xmit;
+ rn->attach_mcast = mlx5i_attach_mcast;
+ rn->detach_mcast = mlx5i_detach_mcast;
+ rn->set_id = mlx5i_set_pkey_index;
+
+ netdev->priv_destructor = mlx5_rdma_netdev_free;
+ netdev->needs_free_netdev = 1;
+
+ return netdev;
+
+detach:
+ profile->cleanup(epriv);
+ if (ipriv->sub_interface)
+ return NULL;
+ mlx5e_destroy_mdev_resources(mdev);
+destroy_ht:
+ mlx5i_pkey_qpn_ht_cleanup(netdev);
+destroy_wq:
+ destroy_workqueue(epriv->wq);
+err_free_netdev:
+ free_netdev(netdev);
+
+ return NULL;
+}
+EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
new file mode 100644
index 000000000..0982c579e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_IPOB_H__
+#define __MLX5E_IPOB_H__
+
+#ifdef CONFIG_MLX5_CORE_IPOIB
+
+#include <linux/mlx5/fs.h>
+#include "en.h"
+
+#define MLX5I_MAX_NUM_TC 1
+
+extern const struct ethtool_ops mlx5i_ethtool_ops;
+extern const struct ethtool_ops mlx5i_pkey_ethtool_ops;
+
+#define MLX5_IB_GRH_BYTES 40
+#define MLX5_IPOIB_ENCAP_LEN 4
+#define MLX5_IPOIB_PSEUDO_LEN 20
+#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN)
+
+/* ipoib rdma netdev's private data structure */
+struct mlx5i_priv {
+ struct rdma_netdev rn; /* keep this first */
+ struct mlx5_core_qp qp;
+ bool sub_interface;
+ u32 qkey;
+ u16 pkey_index;
+ struct mlx5i_pkey_qpn_ht *qpn_htbl;
+ char *mlx5e_priv[0];
+};
+
+/* Underlay QP create/destroy functions */
+int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+
+/* Underlay QP state modification init/uninit functions */
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv);
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv);
+
+/* Allocate/Free underlay QPN to net-device hash table */
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev);
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev);
+
+/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn);
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn);
+
+/* Get the net-device corresponding to the given underlay QPN */
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn);
+
+/* Shared ndo functions */
+int mlx5i_dev_init(struct net_device *dev);
+void mlx5i_dev_cleanup(struct net_device *dev);
+int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+
+/* Parent profile functions */
+void mlx5i_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv);
+
+/* Get child interface nic profile */
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
+
+/* Extract mlx5e_priv from IPoIB netdev */
+#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv))
+
+struct mlx5_wqe_eth_pad {
+ u8 rsvd0[16];
+};
+
+struct mlx5i_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_datagram_seg datagram;
+ struct mlx5_wqe_eth_pad pad;
+ struct mlx5_wqe_eth_seg eth;
+ struct mlx5_wqe_data_seg data[0];
+};
+
+static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
+ struct mlx5i_tx_wqe **wqe,
+ u16 pi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+
+ *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(*wqe, 0, sizeof(**wqe));
+}
+
+netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_av *av, u32 dqpn, u32 dqkey);
+void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+#endif /* CONFIG_MLX5_CORE_IPOIB */
+#endif /* __MLX5E_IPOB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
new file mode 100644
index 000000000..54a188f41
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include "ipoib.h"
+
+#define MLX5I_MAX_LOG_PKEY_SUP 7
+
+struct qpn_to_netdev {
+ struct net_device *netdev;
+ struct hlist_node hlist;
+ u32 underlay_qpn;
+};
+
+struct mlx5i_pkey_qpn_ht {
+ struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP];
+ spinlock_t ht_lock; /* Synchronise with NAPI */
+};
+
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct mlx5i_pkey_qpn_ht *qpn_htbl;
+
+ qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL);
+ if (!qpn_htbl)
+ return -ENOMEM;
+
+ ipriv->qpn_htbl = qpn_htbl;
+ spin_lock_init(&qpn_htbl->ht_lock);
+
+ return 0;
+}
+
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+ kfree(ipriv->qpn_htbl);
+}
+
+static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets,
+ u32 qpn)
+{
+ struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)];
+ struct qpn_to_netdev *node;
+
+ hlist_for_each_entry(node, h, hlist) {
+ if (node->underlay_qpn == qpn)
+ return node;
+ }
+
+ return NULL;
+}
+
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+ u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP);
+ struct qpn_to_netdev *new_node;
+
+ new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node)
+ return -ENOMEM;
+
+ new_node->netdev = netdev;
+ new_node->underlay_qpn = qpn;
+ spin_lock_bh(&ht->ht_lock);
+ hlist_add_head(&new_node->hlist, &ht->buckets[key]);
+ spin_unlock_bh(&ht->ht_lock);
+
+ return 0;
+}
+
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+ struct qpn_to_netdev *node;
+
+ node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn);
+ if (!node) {
+ mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n");
+ return -EINVAL;
+ }
+
+ spin_lock_bh(&ht->ht_lock);
+ hlist_del_init(&node->hlist);
+ spin_unlock_bh(&ht->ht_lock);
+ kfree(node);
+
+ return 0;
+}
+
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct qpn_to_netdev *node;
+
+ node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn);
+ if (!node)
+ return NULL;
+
+ return node->netdev;
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev);
+static int mlx5i_pkey_close(struct net_device *netdev);
+static int mlx5i_pkey_dev_init(struct net_device *dev);
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev);
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu);
+static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+
+static const struct net_device_ops mlx5i_pkey_netdev_ops = {
+ .ndo_open = mlx5i_pkey_open,
+ .ndo_stop = mlx5i_pkey_close,
+ .ndo_init = mlx5i_pkey_dev_init,
+ .ndo_uninit = mlx5i_pkey_dev_cleanup,
+ .ndo_change_mtu = mlx5i_pkey_change_mtu,
+ .ndo_do_ioctl = mlx5i_pkey_ioctl,
+};
+
+/* Child NDOs */
+static int mlx5i_pkey_dev_init(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv, *parent_ipriv;
+ struct net_device *parent_dev;
+ int parent_ifindex;
+
+ ipriv = priv->ppriv;
+
+ /* Get QPN to netdevice hash table from parent */
+ parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev);
+ parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex);
+ if (!parent_dev) {
+ mlx5_core_warn(priv->mdev, "failed to get parent device\n");
+ return -EINVAL;
+ }
+
+ parent_ipriv = netdev_priv(parent_dev);
+ ipriv->qpn_htbl = parent_ipriv->qpn_htbl;
+ dev_put(parent_dev);
+
+ return mlx5i_dev_init(dev);
+}
+
+static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ return mlx5i_ioctl(dev, ifr, cmd);
+}
+
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev)
+{
+ return mlx5i_dev_cleanup(netdev);
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ int err;
+
+ mutex_lock(&epriv->state_lock);
+
+ set_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ err = mlx5i_init_underlay_qp(epriv);
+ if (err) {
+ mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err);
+ goto err_release_lock;
+ }
+
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ if (err) {
+ mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err);
+ goto err_unint_underlay_qp;
+ }
+
+ err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]);
+ if (err) {
+ mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
+ goto err_remove_rx_uderlay_qp;
+ }
+
+ err = mlx5e_open_channels(epriv, &epriv->channels);
+ if (err) {
+ mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
+ goto err_clear_state_opened_flag;
+ }
+ mlx5e_refresh_tirs(epriv, false);
+ mlx5e_activate_priv_channels(epriv);
+ mutex_unlock(&epriv->state_lock);
+
+ return 0;
+
+err_clear_state_opened_flag:
+ mlx5e_destroy_tis(mdev, epriv->tisn[0]);
+err_remove_rx_uderlay_qp:
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+err_unint_underlay_qp:
+ mlx5i_uninit_underlay_qp(epriv);
+err_release_lock:
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+ mutex_unlock(&epriv->state_lock);
+ return err;
+}
+
+static int mlx5i_pkey_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ netif_carrier_off(priv->netdev);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5i_uninit_underlay_qp(priv);
+ mlx5e_deactivate_priv_channels(priv);
+ mlx5e_close_channels(&priv->channels);
+ mlx5e_destroy_tis(mdev, priv->tisn[0]);
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return 0;
+}
+
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ mutex_lock(&priv->state_lock);
+ netdev->mtu = new_mtu;
+ mutex_unlock(&priv->state_lock);
+
+ return 0;
+}
+
+/* Called directly after IPoIB netdevice was created to initialize SW structs */
+static void mlx5i_pkey_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ mlx5i_init(mdev, netdev, profile, ppriv);
+
+ /* Override parent ndo */
+ netdev->netdev_ops = &mlx5i_pkey_netdev_ops;
+
+ /* Set child limited ethtool support */
+ netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops;
+
+ /* Use dummy rqs */
+ priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+}
+
+/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
+static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv)
+{
+ /* Do nothing .. */
+}
+
+static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ int err;
+
+ err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+}
+
+static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv)
+{
+ /* Since the rx resources are shared between child and parent, the
+ * parent interface is taking care of rx resource allocation and init
+ */
+ return 0;
+}
+
+static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv)
+{
+ /* Since the rx resources are shared between child and parent, the
+ * parent interface is taking care of rx resource free and de-init
+ */
+}
+
+static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
+ .init = mlx5i_pkey_init,
+ .cleanup = mlx5i_pkey_cleanup,
+ .init_tx = mlx5i_pkey_init_tx,
+ .cleanup_tx = mlx5i_pkey_cleanup_tx,
+ .init_rx = mlx5i_pkey_init_rx,
+ .cleanup_rx = mlx5i_pkey_cleanup_rx,
+ .enable = NULL,
+ .disable = NULL,
+ .update_stats = NULL,
+ .max_nch = mlx5e_get_max_num_channels,
+ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
+ .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
+ .max_tc = MLX5I_MAX_NUM_TC,
+};
+
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
+{
+ return &mlx5i_pkey_nic_profile;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
new file mode 100644
index 000000000..582b2f180
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+
+enum {
+ MLX5_LAG_FLAG_BONDED = 1 << 0,
+};
+
+struct lag_func {
+ struct mlx5_core_dev *dev;
+ struct net_device *netdev;
+};
+
+/* Used for collection of netdev event info. */
+struct lag_tracker {
+ enum netdev_lag_tx_type tx_type;
+ struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
+ bool is_bonded;
+};
+
+/* LAG data of a ConnectX card.
+ * It serves both its phys functions.
+ */
+struct mlx5_lag {
+ u8 flags;
+ u8 v2p_map[MLX5_MAX_PORTS];
+ struct lag_func pf[MLX5_MAX_PORTS];
+ struct lag_tracker tracker;
+ struct delayed_work bond_work;
+ struct notifier_block nb;
+
+ /* Admin state. Allow lag only if allowed is true
+ * even if network conditions for lag were met
+ */
+ bool allowed;
+};
+
+/* General purpose, use for short periods of time.
+ * Beware of lock dependencies (preferably, no locks should be acquired
+ * under it).
+ */
+static DEFINE_MUTEX(lag_mutex);
+
+static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
+ u8 remap_port2)
+{
+ u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
+ void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
+
+ MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
+
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
+ u8 remap_port2)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
+ void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
+
+ MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
+ MLX5_SET(modify_lag_in, in, field_select, 0x1);
+
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
+
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
+
+ MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
+
+int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
+
+ MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+
+static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+ bool reset, void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
+
+ MLX5_SET(query_cong_statistics_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+ MLX5_SET(query_cong_statistics_in, in, clear, reset);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+
+static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
+{
+ return dev->priv.lag;
+}
+
+static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+ struct net_device *ndev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].netdev == ndev)
+ return i;
+
+ return -1;
+}
+
+static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
+}
+
+static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
+ u8 *port1, u8 *port2)
+{
+ *port1 = 1;
+ *port2 = 2;
+ if (!tracker->netdev_state[0].tx_enabled ||
+ !tracker->netdev_state[0].link_up) {
+ *port1 = 2;
+ return;
+ }
+
+ if (!tracker->netdev_state[1].tx_enabled ||
+ !tracker->netdev_state[1].link_up)
+ *port2 = 1;
+}
+
+static void mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
+
+ ldev->flags |= MLX5_LAG_FLAG_BONDED;
+
+ mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
+ &ldev->v2p_map[1]);
+
+ err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to create LAG (%d)\n",
+ err);
+}
+
+static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
+
+ ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
+
+ err = mlx5_cmd_destroy_lag(dev0);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to destroy LAG (%d)\n",
+ err);
+}
+
+static void mlx5_do_bond(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
+ struct lag_tracker tracker;
+ u8 v2p_port1, v2p_port2;
+ int i, err;
+ bool do_bond;
+
+ if (!dev0 || !dev1)
+ return;
+
+ mutex_lock(&lag_mutex);
+ tracker = ldev->tracker;
+ mutex_unlock(&lag_mutex);
+
+ do_bond = tracker.is_bonded && ldev->allowed;
+
+ if (do_bond && !mlx5_lag_is_bonded(ldev)) {
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
+
+ mlx5_activate_lag(ldev, &tracker);
+
+ mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_enable_roce(dev1);
+ } else if (do_bond && mlx5_lag_is_bonded(ldev)) {
+ mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
+ &v2p_port2);
+
+ if ((v2p_port1 != ldev->v2p_map[0]) ||
+ (v2p_port2 != ldev->v2p_map[1])) {
+ ldev->v2p_map[0] = v2p_port1;
+ ldev->v2p_map[1] = v2p_port2;
+
+ err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ }
+ } else if (!do_bond && mlx5_lag_is_bonded(ldev)) {
+ mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_disable_roce(dev1);
+
+ mlx5_deactivate_lag(ldev);
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_add_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
+ }
+}
+
+static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
+{
+ schedule_delayed_work(&ldev->bond_work, delay);
+}
+
+static void mlx5_do_bond_work(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
+ bond_work);
+ int status;
+
+ status = mlx5_dev_list_trylock();
+ if (!status) {
+ /* 1 sec delay. */
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mlx5_do_bond(ldev);
+ mlx5_dev_list_unlock();
+}
+
+static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct net_device *ndev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *upper = info->upper_dev, *ndev_tmp;
+ struct netdev_lag_upper_info *lag_upper_info = NULL;
+ bool is_bonded;
+ int bond_status = 0;
+ int num_slaves = 0;
+ int idx;
+
+ if (!netif_is_lag_master(upper))
+ return 0;
+
+ if (info->linking)
+ lag_upper_info = info->upper_info;
+
+ /* The event may still be of interest if the slave does not belong to
+ * us, but is enslaved to a master which has one or more of our netdevs
+ * as slaves (e.g., if a new slave is added to a master that bonds two
+ * of our netdevs, we should unbond).
+ */
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
+ if (idx > -1)
+ bond_status |= (1 << idx);
+
+ num_slaves++;
+ }
+ rcu_read_unlock();
+
+ /* None of this lagdev's netdevs are slaves of this master. */
+ if (!(bond_status & 0x3))
+ return 0;
+
+ if (lag_upper_info)
+ tracker->tx_type = lag_upper_info->tx_type;
+
+ /* Determine bonding status:
+ * A device is considered bonded if both its physical ports are slaves
+ * of the same lag master, and only them.
+ * Lag mode must be activebackup or hash.
+ */
+ is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
+ (bond_status == 0x3) &&
+ ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
+ (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
+
+ if (tracker->is_bonded != is_bonded) {
+ tracker->is_bonded = is_bonded;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct net_device *ndev,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ struct netdev_lag_lower_state_info *lag_lower_info;
+ int idx;
+
+ if (!netif_is_lag_port(ndev))
+ return 0;
+
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
+ if (idx == -1)
+ return 0;
+
+ /* This information is used to determine virtual to physical
+ * port mapping.
+ */
+ lag_lower_info = info->lower_state_info;
+ if (!lag_lower_info)
+ return 0;
+
+ tracker->netdev_state[idx] = *lag_lower_info;
+
+ return 1;
+}
+
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct lag_tracker tracker;
+ struct mlx5_lag *ldev;
+ int changed = 0;
+
+ if (!net_eq(dev_net(ndev), &init_net))
+ return NOTIFY_DONE;
+
+ if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
+ return NOTIFY_DONE;
+
+ ldev = container_of(this, struct mlx5_lag, nb);
+ tracker = ldev->tracker;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
+ ptr);
+ break;
+ case NETDEV_CHANGELOWERSTATE:
+ changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
+ ndev, ptr);
+ break;
+ }
+
+ mutex_lock(&lag_mutex);
+ ldev->tracker = tracker;
+ mutex_unlock(&lag_mutex);
+
+ if (changed)
+ mlx5_queue_bond_work(ldev, 0);
+
+ return NOTIFY_DONE;
+}
+
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+{
+ if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) ||
+ (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev)))
+ return false;
+ else
+ return true;
+}
+
+static struct mlx5_lag *mlx5_lag_dev_alloc(void)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev)
+ return NULL;
+
+ INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+ ldev->allowed = mlx5_lag_check_prereq(ldev);
+
+ return ldev;
+}
+
+static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
+{
+ kfree(ldev);
+}
+
+static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev,
+ struct net_device *netdev)
+{
+ unsigned int fn = PCI_FUNC(dev->pdev->devfn);
+
+ if (fn >= MLX5_MAX_PORTS)
+ return;
+
+ mutex_lock(&lag_mutex);
+ ldev->pf[fn].dev = dev;
+ ldev->pf[fn].netdev = netdev;
+ ldev->tracker.netdev_state[fn].link_up = 0;
+ ldev->tracker.netdev_state[fn].tx_enabled = 0;
+
+ ldev->allowed = mlx5_lag_check_prereq(ldev);
+ dev->priv.lag = ldev;
+
+ mutex_unlock(&lag_mutex);
+}
+
+static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev == dev)
+ break;
+
+ if (i == MLX5_MAX_PORTS)
+ return;
+
+ mutex_lock(&lag_mutex);
+ memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
+
+ dev->priv.lag = NULL;
+ ldev->allowed = mlx5_lag_check_prereq(ldev);
+ mutex_unlock(&lag_mutex);
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
+{
+ struct mlx5_lag *ldev = NULL;
+ struct mlx5_core_dev *tmp_dev;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
+ return;
+
+ tmp_dev = mlx5_get_next_phys_dev(dev);
+ if (tmp_dev)
+ ldev = tmp_dev->priv.lag;
+
+ if (!ldev) {
+ ldev = mlx5_lag_dev_alloc();
+ if (!ldev) {
+ mlx5_core_err(dev, "Failed to alloc lag dev\n");
+ return;
+ }
+ }
+
+ mlx5_lag_dev_add_pf(ldev, dev, netdev);
+
+ if (!ldev->nb.notifier_call) {
+ ldev->nb.notifier_call = mlx5_lag_netdev_event;
+ if (register_netdevice_notifier(&ldev->nb)) {
+ ldev->nb.notifier_call = NULL;
+ mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+ }
+ }
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_remove(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ int i;
+
+ ldev = mlx5_lag_dev_get(dev);
+ if (!ldev)
+ return;
+
+ if (mlx5_lag_is_bonded(ldev))
+ mlx5_deactivate_lag(ldev);
+
+ mlx5_lag_dev_remove_pf(ldev, dev);
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ break;
+
+ if (i == MLX5_MAX_PORTS) {
+ if (ldev->nb.notifier_call)
+ unregister_netdevice_notifier(&ldev->nb);
+ cancel_delayed_work_sync(&ldev->bond_work);
+ mlx5_lag_dev_free(ldev);
+ }
+}
+
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+ res = ldev && mlx5_lag_is_bonded(ldev);
+ mutex_unlock(&lag_mutex);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_active);
+
+static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow)
+{
+ struct mlx5_lag *ldev;
+ int ret = 0;
+ bool lag_active;
+
+ mlx5_dev_list_lock();
+
+ ldev = mlx5_lag_dev_get(dev);
+ if (!ldev) {
+ ret = -ENODEV;
+ goto unlock;
+ }
+ lag_active = mlx5_lag_is_bonded(ldev);
+ if (!mlx5_lag_check_prereq(ldev) && allow) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ if (ldev->allowed == allow)
+ goto unlock;
+ ldev->allowed = allow;
+ if ((lag_active && !allow) || allow)
+ mlx5_do_bond(ldev);
+unlock:
+ mlx5_dev_list_unlock();
+ return ret;
+}
+
+int mlx5_lag_forbid(struct mlx5_core_dev *dev)
+{
+ return mlx5_lag_set_state(dev, false);
+}
+
+int mlx5_lag_allow(struct mlx5_core_dev *dev)
+{
+ return mlx5_lag_set_state(dev, true);
+}
+
+struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
+{
+ struct net_device *ndev = NULL;
+ struct mlx5_lag *ldev;
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+
+ if (!(ldev && mlx5_lag_is_bonded(ldev)))
+ goto unlock;
+
+ if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ ndev = ldev->tracker.netdev_state[0].tx_enabled ?
+ ldev->pf[0].netdev : ldev->pf[1].netdev;
+ } else {
+ ndev = ldev->pf[0].netdev;
+ }
+ if (ndev)
+ dev_hold(ndev);
+
+unlock:
+ mutex_unlock(&lag_mutex);
+
+ return ndev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
+
+bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
+ priv);
+ struct mlx5_lag *ldev;
+
+ if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
+ return true;
+
+ ldev = mlx5_lag_dev_get(dev);
+ if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
+ return true;
+
+ /* If bonded, we do not add an IB device for PF1. */
+ return false;
+}
+
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+ u64 *values,
+ int num_counters,
+ size_t *offsets)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+ struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
+ struct mlx5_lag *ldev;
+ int num_ports;
+ int ret, i, j;
+ void *out;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ memset(values, 0, sizeof(*values) * num_counters);
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+ if (ldev && mlx5_lag_is_bonded(ldev)) {
+ num_ports = MLX5_MAX_PORTS;
+ mdev[0] = ldev->pf[0].dev;
+ mdev[1] = ldev->pf[1].dev;
+ } else {
+ num_ports = 1;
+ mdev[0] = dev;
+ }
+
+ for (i = 0; i < num_ports; ++i) {
+ ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
+ if (ret)
+ goto unlock;
+
+ for (j = 0; j < num_counters; ++j)
+ values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
+ }
+
+unlock:
+ mutex_unlock(&lag_mutex);
+ kvfree(out);
+ return ret;
+}
+EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
new file mode 100644
index 000000000..0fd62510f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -0,0 +1,616 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/highmem.h>
+#include <rdma/mlx5-abi.h>
+#include "en.h"
+#include "clock.h"
+
+enum {
+ MLX5_CYCLES_SHIFT = 23
+};
+
+enum {
+ MLX5_PIN_MODE_IN = 0x0,
+ MLX5_PIN_MODE_OUT = 0x1,
+};
+
+enum {
+ MLX5_OUT_PATTERN_PULSE = 0x0,
+ MLX5_OUT_PATTERN_PERIODIC = 0x1,
+};
+
+enum {
+ MLX5_EVENT_MODE_DISABLE = 0x0,
+ MLX5_EVENT_MODE_REPETETIVE = 0x1,
+ MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2,
+};
+
+enum {
+ MLX5_MTPPS_FS_ENABLE = BIT(0x0),
+ MLX5_MTPPS_FS_PATTERN = BIT(0x2),
+ MLX5_MTPPS_FS_PIN_MODE = BIT(0x3),
+ MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4),
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5),
+ MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7),
+};
+
+static u64 read_internal_timer(const struct cyclecounter *cc)
+{
+ struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles);
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
+
+ return mlx5_read_internal_timer(mdev) & cc->mask;
+}
+
+static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+ struct mlx5_clock *clock = &mdev->clock;
+ u32 sign;
+
+ if (!clock_info)
+ return;
+
+ sign = smp_load_acquire(&clock_info->sign);
+ smp_store_mb(clock_info->sign,
+ sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING);
+
+ clock_info->cycles = clock->tc.cycle_last;
+ clock_info->mult = clock->cycles.mult;
+ clock_info->nsec = clock->tc.nsec;
+ clock_info->frac = clock->tc.frac;
+
+ smp_store_release(&clock_info->sign,
+ sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2);
+}
+
+static void mlx5_pps_out(struct work_struct *work)
+{
+ struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
+ out_work);
+ struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock,
+ pps_info);
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < clock->ptp_info.n_pins; i++) {
+ u64 tstart;
+
+ write_lock_irqsave(&clock->lock, flags);
+ tstart = clock->pps_info.start[i];
+ clock->pps_info.start[i] = 0;
+ write_unlock_irqrestore(&clock->lock, flags);
+ if (!tstart)
+ continue;
+
+ MLX5_SET(mtpps_reg, in, pin, i);
+ MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
+ MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP);
+ mlx5_set_mtpps(mdev, in, sizeof(in));
+ }
+}
+
+static void mlx5_timestamp_overflow(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock,
+ overflow_work);
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_read(&clock->tc);
+ mlx5_update_clock_info_page(clock->mdev);
+ write_unlock_irqrestore(&clock->lock, flags);
+ schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
+}
+
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+ u64 ns = timespec64_to_ns(ts);
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_init(&clock->tc, &clock->cycles, ns);
+ mlx5_update_clock_info_page(clock->mdev);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+ u64 ns;
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ ns = timecounter_read(&clock->tc);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ *ts = ns_to_timespec64(ns);
+
+ return 0;
+}
+
+static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_adjtime(&clock->tc, delta);
+ mlx5_update_clock_info_page(clock->mdev);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+ u64 adj;
+ u32 diff;
+ unsigned long flags;
+ int neg_adj = 0;
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+
+ if (delta < 0) {
+ neg_adj = 1;
+ delta = -delta;
+ }
+
+ adj = clock->nominal_c_mult;
+ adj *= delta;
+ diff = div_u64(adj, 1000000000ULL);
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_read(&clock->tc);
+ clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
+ clock->nominal_c_mult + diff;
+ mlx5_update_clock_info_page(clock->mdev);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_extts_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev =
+ container_of(clock, struct mlx5_core_dev, clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ u32 field_select = 0;
+ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
+ int err = 0;
+
+ if (!MLX5_PPS_CAP(mdev))
+ return -EOPNOTSUPP;
+
+ if (rq->extts.index >= clock->ptp_info.n_pins)
+ return -EINVAL;
+
+ if (on) {
+ pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
+ if (pin < 0)
+ return -EBUSY;
+ pin_mode = MLX5_PIN_MODE_IN;
+ pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
+ field_select = MLX5_MTPPS_FS_PIN_MODE |
+ MLX5_MTPPS_FS_PATTERN |
+ MLX5_MTPPS_FS_ENABLE;
+ } else {
+ pin = rq->extts.index;
+ field_select = MLX5_MTPPS_FS_ENABLE;
+ }
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+ err = mlx5_set_mtpps(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ return mlx5_set_mtppse(mdev, pin, 0,
+ MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_perout_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev =
+ container_of(clock, struct mlx5_core_dev, clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ u64 nsec_now, nsec_delta, time_stamp = 0;
+ u64 cycles_now, cycles_delta;
+ struct timespec64 ts;
+ unsigned long flags;
+ u32 field_select = 0;
+ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
+ int err = 0;
+ s64 ns;
+
+ if (!MLX5_PPS_CAP(mdev))
+ return -EOPNOTSUPP;
+
+ if (rq->perout.index >= clock->ptp_info.n_pins)
+ return -EINVAL;
+
+ if (on) {
+ pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT,
+ rq->perout.index);
+ if (pin < 0)
+ return -EBUSY;
+
+ pin_mode = MLX5_PIN_MODE_OUT;
+ pattern = MLX5_OUT_PATTERN_PERIODIC;
+ ts.tv_sec = rq->perout.period.sec;
+ ts.tv_nsec = rq->perout.period.nsec;
+ ns = timespec64_to_ns(&ts);
+
+ if ((ns >> 1) != 500000000LL)
+ return -EINVAL;
+
+ ts.tv_sec = rq->perout.start.sec;
+ ts.tv_nsec = rq->perout.start.nsec;
+ ns = timespec64_to_ns(&ts);
+ cycles_now = mlx5_read_internal_timer(mdev);
+ write_lock_irqsave(&clock->lock, flags);
+ nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
+ nsec_delta = ns - nsec_now;
+ cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
+ clock->cycles.mult);
+ write_unlock_irqrestore(&clock->lock, flags);
+ time_stamp = cycles_now + cycles_delta;
+ field_select = MLX5_MTPPS_FS_PIN_MODE |
+ MLX5_MTPPS_FS_PATTERN |
+ MLX5_MTPPS_FS_ENABLE |
+ MLX5_MTPPS_FS_TIME_STAMP;
+ } else {
+ pin = rq->perout.index;
+ field_select = MLX5_MTPPS_FS_ENABLE;
+ }
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+ MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+ err = mlx5_set_mtpps(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ return mlx5_set_mtppse(mdev, pin, 0,
+ MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_pps_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+
+ clock->pps_info.enabled = !!on;
+ return 0;
+}
+
+static int mlx5_ptp_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ return mlx5_extts_configure(ptp, rq, on);
+ case PTP_CLK_REQ_PEROUT:
+ return mlx5_perout_configure(ptp, rq, on);
+ case PTP_CLK_REQ_PPS:
+ return mlx5_pps_configure(ptp, rq, on);
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+enum {
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0),
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1),
+};
+
+static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+
+ switch (func) {
+ case PTP_PF_NONE:
+ return 0;
+ case PTP_PF_EXTTS:
+ return !(clock->pps_info.pin_caps[pin] &
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN);
+ case PTP_PF_PEROUT:
+ return !(clock->pps_info.pin_caps[pin] &
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const struct ptp_clock_info mlx5_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .name = "mlx5_p2p",
+ .max_adj = 100000000,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+ .n_pins = 0,
+ .pps = 0,
+ .adjfreq = mlx5_ptp_adjfreq,
+ .adjtime = mlx5_ptp_adjtime,
+ .gettime64 = mlx5_ptp_gettime,
+ .settime64 = mlx5_ptp_settime,
+ .enable = NULL,
+ .verify = NULL,
+};
+
+static int mlx5_init_pin_config(struct mlx5_clock *clock)
+{
+ int i;
+
+ clock->ptp_info.pin_config =
+ kcalloc(clock->ptp_info.n_pins,
+ sizeof(*clock->ptp_info.pin_config),
+ GFP_KERNEL);
+ if (!clock->ptp_info.pin_config)
+ return -ENOMEM;
+ clock->ptp_info.enable = mlx5_ptp_enable;
+ clock->ptp_info.verify = mlx5_ptp_verify;
+ clock->ptp_info.pps = 1;
+
+ for (i = 0; i < clock->ptp_info.n_pins; i++) {
+ snprintf(clock->ptp_info.pin_config[i].name,
+ sizeof(clock->ptp_info.pin_config[i].name),
+ "mlx5_pps%d", i);
+ clock->ptp_info.pin_config[i].index = i;
+ clock->ptp_info.pin_config[i].func = PTP_PF_NONE;
+ clock->ptp_info.pin_config[i].chan = i;
+ }
+
+ return 0;
+}
+
+static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ mlx5_query_mtpps(mdev, out, sizeof(out));
+
+ clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out,
+ cap_number_of_pps_pins);
+ clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out,
+ cap_max_num_of_pps_in_pins);
+ clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
+ cap_max_num_of_pps_out_pins);
+
+ clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
+ clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
+ clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
+ clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
+ clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
+ clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
+ clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
+ clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
+}
+
+void mlx5_pps_event(struct mlx5_core_dev *mdev,
+ struct mlx5_eqe *eqe)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ struct ptp_clock_event ptp_event;
+ struct timespec64 ts;
+ u64 nsec_now, nsec_delta;
+ u64 cycles_now, cycles_delta;
+ int pin = eqe->data.pps.pin;
+ s64 ns;
+ unsigned long flags;
+
+ switch (clock->ptp_info.pin_config[pin].func) {
+ case PTP_PF_EXTTS:
+ ptp_event.index = pin;
+ ptp_event.timestamp =
+ mlx5_timecounter_cyc2time(clock,
+ be64_to_cpu(eqe->data.pps.time_stamp));
+ if (clock->pps_info.enabled) {
+ ptp_event.type = PTP_CLOCK_PPSUSR;
+ ptp_event.pps_times.ts_real =
+ ns_to_timespec64(ptp_event.timestamp);
+ } else {
+ ptp_event.type = PTP_CLOCK_EXTTS;
+ }
+ ptp_clock_event(clock->ptp, &ptp_event);
+ break;
+ case PTP_PF_PEROUT:
+ mlx5_ptp_gettime(&clock->ptp_info, &ts);
+ cycles_now = mlx5_read_internal_timer(mdev);
+ ts.tv_sec += 1;
+ ts.tv_nsec = 0;
+ ns = timespec64_to_ns(&ts);
+ write_lock_irqsave(&clock->lock, flags);
+ nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
+ nsec_delta = ns - nsec_now;
+ cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
+ clock->cycles.mult);
+ clock->pps_info.start[pin] = cycles_now + cycles_delta;
+ schedule_work(&clock->pps_info.out_work);
+ write_unlock_irqrestore(&clock->lock, flags);
+ break;
+ default:
+ mlx5_core_err(mdev, " Unhandled event\n");
+ }
+}
+
+void mlx5_init_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u64 overflow_cycles;
+ u64 ns;
+ u64 frac = 0;
+ u32 dev_freq;
+
+ dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz);
+ if (!dev_freq) {
+ mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+ return;
+ }
+ rwlock_init(&clock->lock);
+ clock->cycles.read = read_internal_timer;
+ clock->cycles.shift = MLX5_CYCLES_SHIFT;
+ clock->cycles.mult = clocksource_khz2mult(dev_freq,
+ clock->cycles.shift);
+ clock->nominal_c_mult = clock->cycles.mult;
+ clock->cycles.mask = CLOCKSOURCE_MASK(41);
+ clock->mdev = mdev;
+
+ timecounter_init(&clock->tc, &clock->cycles,
+ ktime_to_ns(ktime_get_real()));
+
+ /* Calculate period in seconds to call the overflow watchdog - to make
+ * sure counter is checked at least twice every wrap around.
+ * The period is calculated as the minimum between max HW cycles count
+ * (The clock source mask) and max amount of cycles that can be
+ * multiplied by clock multiplier where the result doesn't exceed
+ * 64bits.
+ */
+ overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
+ overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
+
+ ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles,
+ frac, &frac);
+ do_div(ns, NSEC_PER_SEC / HZ);
+ clock->overflow_period = ns;
+
+ mdev->clock_info_page = alloc_page(GFP_KERNEL);
+ if (mdev->clock_info_page) {
+ mdev->clock_info = kmap(mdev->clock_info_page);
+ if (!mdev->clock_info) {
+ __free_page(mdev->clock_info_page);
+ mlx5_core_warn(mdev, "failed to map clock page\n");
+ } else {
+ mdev->clock_info->sign = 0;
+ mdev->clock_info->nsec = clock->tc.nsec;
+ mdev->clock_info->cycles = clock->tc.cycle_last;
+ mdev->clock_info->mask = clock->cycles.mask;
+ mdev->clock_info->mult = clock->nominal_c_mult;
+ mdev->clock_info->shift = clock->cycles.shift;
+ mdev->clock_info->frac = clock->tc.frac;
+ mdev->clock_info->overflow_period =
+ clock->overflow_period;
+ }
+ }
+
+ INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
+ INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow);
+ if (clock->overflow_period)
+ schedule_delayed_work(&clock->overflow_work, 0);
+ else
+ mlx5_core_warn(mdev, "invalid overflow period, overflow_work is not scheduled\n");
+
+ /* Configure the PHC */
+ clock->ptp_info = mlx5_ptp_clock_info;
+
+ /* Initialize 1PPS data structures */
+ if (MLX5_PPS_CAP(mdev))
+ mlx5_get_pps_caps(mdev);
+ if (clock->ptp_info.n_pins)
+ mlx5_init_pin_config(clock);
+
+ clock->ptp = ptp_clock_register(&clock->ptp_info,
+ &mdev->pdev->dev);
+ if (IS_ERR(clock->ptp)) {
+ mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n",
+ PTR_ERR(clock->ptp));
+ clock->ptp = NULL;
+ }
+}
+
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ if (clock->ptp) {
+ ptp_clock_unregister(clock->ptp);
+ clock->ptp = NULL;
+ }
+
+ cancel_work_sync(&clock->pps_info.out_work);
+ cancel_delayed_work_sync(&clock->overflow_work);
+
+ if (mdev->clock_info) {
+ kunmap(mdev->clock_info_page);
+ __free_page(mdev->clock_info_page);
+ mdev->clock_info = NULL;
+ }
+
+ kfree(clock->ptp_info.pin_config);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
new file mode 100644
index 000000000..02e2e4575
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_CLOCK_H__
+#define __LIB_CLOCK_H__
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+void mlx5_init_clock(struct mlx5_core_dev *mdev);
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
+void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+
+static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
+{
+ return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1;
+}
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ u64 nsec;
+
+ read_lock(&clock->lock);
+ nsec = timecounter_cyc2time(&clock->tc, timestamp);
+ read_unlock(&clock->lock);
+
+ return ns_to_ktime(nsec);
+}
+
+#else
+static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {}
+
+static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
+{
+ return -1;
+}
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
new file mode 100644
index 000000000..7722a3f9b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
+{
+ unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size);
+
+ ida_init(&dev->roce.reserved_gids.ida);
+ dev->roce.reserved_gids.start = tblsz;
+ dev->roce.reserved_gids.count = 0;
+}
+
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
+{
+ WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida));
+ dev->roce.reserved_gids.start = 0;
+ dev->roce.reserved_gids.count = 0;
+ ida_destroy(&dev->roce.reserved_gids.ida);
+}
+
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n");
+ return -EPERM;
+ }
+ if (dev->roce.reserved_gids.start < count) {
+ mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
+ count);
+ return -ENOMEM;
+ }
+ if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) {
+ mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count);
+ return -ENOMEM;
+ }
+
+ dev->roce.reserved_gids.start -= count;
+ dev->roce.reserved_gids.count += count;
+ mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n",
+ dev->roce.reserved_gids.count,
+ dev->roce.reserved_gids.start);
+ return 0;
+}
+
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+ WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up");
+ WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
+ count, dev->roce.reserved_gids.count);
+
+ dev->roce.reserved_gids.start += count;
+ dev->roce.reserved_gids.count -= count;
+ mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n",
+ dev->roce.reserved_gids.count,
+ dev->roce.reserved_gids.start);
+}
+
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
+{
+ int end = dev->roce.reserved_gids.start +
+ dev->roce.reserved_gids.count;
+ int index = 0;
+
+ index = ida_simple_get(&dev->roce.reserved_gids.ida,
+ dev->roce.reserved_gids.start, end,
+ GFP_KERNEL);
+ if (index < 0)
+ return index;
+
+ mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index);
+ *gid_index = index;
+ return 0;
+}
+
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
+{
+ mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
+ ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index);
+}
+
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
+{
+ return dev->roce.reserved_gids.count;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
+
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+ u8 roce_version, u8 roce_l3_type, const u8 *gid,
+ const u8 *mac, bool vlan, u16 vlan_id, u8 port_num)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
+ void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+ char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+ source_l3_address);
+ void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+ source_mac_47_32);
+ int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address);
+
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EINVAL;
+
+ if (gid) {
+ if (vlan) {
+ MLX5_SET_RA(in_addr, vlan_valid, 1);
+ MLX5_SET_RA(in_addr, vlan_id, vlan_id);
+ }
+
+ ether_addr_copy(addr_mac, mac);
+ MLX5_SET_RA(in_addr, roce_version, roce_version);
+ MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
+ memcpy(addr_l3_addr, gid, gidsz);
+ }
+
+ if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0)
+ MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num);
+
+ MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_roce_gid_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
new file mode 100644
index 000000000..7550b1cc8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_MLX5_H__
+#define __LIB_MLX5_H__
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
new file mode 100644
index 000000000..98359559c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_core.h"
+#include "lib/mpfs.h"
+
+/* HW L2 Table (MPFS) management */
+static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac)
+{
+ u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
+ u8 *in_mac_addr;
+
+ MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
+ MLX5_SET(set_l2_table_entry_in, in, table_index, index);
+
+ in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
+ ether_addr_copy(&in_mac_addr[2], mac);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
+{
+ u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
+
+ MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+ MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+/* UC L2 table hash node */
+struct l2table_node {
+ struct l2addr_node node;
+ u32 index; /* index in HW l2 table */
+};
+
+struct mlx5_mpfs {
+ struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE];
+ struct mutex lock; /* Synchronize l2 table access */
+ u32 size;
+ unsigned long *bitmap;
+};
+
+static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix)
+{
+ int err = 0;
+
+ *ix = find_first_zero_bit(l2table->bitmap, l2table->size);
+ if (*ix >= l2table->size)
+ err = -ENOSPC;
+ else
+ __set_bit(*ix, l2table->bitmap);
+
+ return err;
+}
+
+static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix)
+{
+ __clear_bit(ix, l2table->bitmap);
+}
+
+int mlx5_mpfs_init(struct mlx5_core_dev *dev)
+{
+ int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
+ struct mlx5_mpfs *mpfs;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return 0;
+
+ mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL);
+ if (!mpfs)
+ return -ENOMEM;
+
+ mutex_init(&mpfs->lock);
+ mpfs->size = l2table_size;
+ mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size),
+ sizeof(uintptr_t), GFP_KERNEL);
+ if (!mpfs->bitmap) {
+ kfree(mpfs);
+ return -ENOMEM;
+ }
+
+ dev->priv.mpfs = mpfs;
+ return 0;
+}
+
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return;
+
+ WARN_ON(!hlist_empty(mpfs->hash));
+ kfree(mpfs->bitmap);
+ kfree(mpfs);
+}
+
+int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+ struct l2table_node *l2addr;
+ u32 index;
+ int err;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return 0;
+
+ mutex_lock(&mpfs->lock);
+
+ l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+ if (l2addr) {
+ err = -EEXIST;
+ goto abort;
+ }
+
+ err = alloc_l2table_index(mpfs, &index);
+ if (err)
+ goto abort;
+
+ l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
+ if (!l2addr) {
+ free_l2table_index(mpfs, index);
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ l2addr->index = index;
+ err = set_l2table_entry_cmd(dev, index, mac);
+ if (err) {
+ l2addr_hash_del(l2addr);
+ free_l2table_index(mpfs, index);
+ }
+
+ mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
+abort:
+ mutex_unlock(&mpfs->lock);
+ return err;
+}
+
+int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+ struct l2table_node *l2addr;
+ int err = 0;
+ u32 index;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return 0;
+
+ mutex_lock(&mpfs->lock);
+
+ l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+ if (!l2addr) {
+ err = -ENOENT;
+ goto unlock;
+ }
+
+ index = l2addr->index;
+ del_l2table_entry_cmd(dev, index);
+ l2addr_hash_del(l2addr);
+ free_l2table_index(mpfs, index);
+ mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index);
+unlock:
+ mutex_unlock(&mpfs->lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
new file mode 100644
index 000000000..4a7b2c320
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_MPFS_H__
+#define __MLX5_MPFS_H__
+
+#include <linux/if_ether.h>
+#include <linux/mlx5/device.h>
+
+/* L2 -mac address based- hash helpers */
+#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
+#define MLX5_L2_ADDR_HASH(addr) (addr[5])
+
+struct l2addr_node {
+ struct hlist_node hlist;
+ u8 addr[ETH_ALEN];
+};
+
+#define for_each_l2hash_node(hn, tmp, hash, i) \
+ for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
+ hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist)
+
+#define l2addr_hash_find(hash, mac, type) ({ \
+ int ix = MLX5_L2_ADDR_HASH(mac); \
+ bool found = false; \
+ type *ptr = NULL; \
+ \
+ hlist_for_each_entry(ptr, &(hash)[ix], node.hlist) \
+ if (ether_addr_equal(ptr->node.addr, mac)) {\
+ found = true; \
+ break; \
+ } \
+ if (!found) \
+ ptr = NULL; \
+ ptr; \
+})
+
+#define l2addr_hash_add(hash, mac, type, gfp) ({ \
+ int ix = MLX5_L2_ADDR_HASH(mac); \
+ type *ptr = NULL; \
+ \
+ ptr = kzalloc(sizeof(type), gfp); \
+ if (ptr) { \
+ ether_addr_copy(ptr->node.addr, mac); \
+ hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\
+ } \
+ ptr; \
+})
+
+#define l2addr_hash_del(ptr) ({ \
+ hlist_del(&(ptr)->node.hlist); \
+ kfree(ptr); \
+})
+
+#ifdef CONFIG_MLX5_MPFS
+int mlx5_mpfs_init(struct mlx5_core_dev *dev);
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac);
+int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac);
+#else /* #ifndef CONFIG_MLX5_MPFS */
+static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {}
+static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+#endif
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
new file mode 100644
index 000000000..9a8fd7621
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "vxlan.h"
+
+struct mlx5_vxlan {
+ struct mlx5_core_dev *mdev;
+ spinlock_t lock; /* protect vxlan table */
+ /* max_num_ports is usuallly 4, 16 buckets is more than enough */
+ DECLARE_HASHTABLE(htable, 4);
+ int num_ports;
+ struct mutex sync_lock; /* sync add/del port HW operations */
+};
+
+struct mlx5_vxlan_port {
+ struct hlist_node hlist;
+ atomic_t refcount;
+ u16 udp_port;
+};
+
+static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
+static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+ u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
+
+ MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
+ MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
+ MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+ u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
+
+ MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
+ MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+ MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static struct mlx5_vxlan_port*
+mlx5_vxlan_lookup_port_locked(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+
+ hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) {
+ if (vxlanp->udp_port == port)
+ return vxlanp;
+ }
+
+ return NULL;
+}
+
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+
+ if (!mlx5_vxlan_allowed(vxlan))
+ return NULL;
+
+ spin_lock_bh(&vxlan->lock);
+ vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+ spin_unlock_bh(&vxlan->lock);
+
+ return vxlanp;
+}
+
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ int ret = -ENOSPC;
+
+ vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
+ if (vxlanp) {
+ atomic_inc(&vxlanp->refcount);
+ return 0;
+ }
+
+ mutex_lock(&vxlan->sync_lock);
+ if (vxlan->num_ports >= mlx5_vxlan_max_udp_ports(vxlan->mdev)) {
+ mlx5_core_info(vxlan->mdev,
+ "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
+ port, mlx5_vxlan_max_udp_ports(vxlan->mdev));
+ ret = -ENOSPC;
+ goto unlock;
+ }
+
+ ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
+ if (ret)
+ goto unlock;
+
+ vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
+ if (!vxlanp) {
+ ret = -ENOMEM;
+ goto err_delete_port;
+ }
+
+ vxlanp->udp_port = port;
+ atomic_set(&vxlanp->refcount, 1);
+
+ spin_lock_bh(&vxlan->lock);
+ hash_add(vxlan->htable, &vxlanp->hlist, port);
+ spin_unlock_bh(&vxlan->lock);
+
+ vxlan->num_ports++;
+ mutex_unlock(&vxlan->sync_lock);
+ return 0;
+
+err_delete_port:
+ mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+
+unlock:
+ mutex_unlock(&vxlan->sync_lock);
+ return ret;
+}
+
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ bool remove = false;
+ int ret = 0;
+
+ mutex_lock(&vxlan->sync_lock);
+
+ spin_lock_bh(&vxlan->lock);
+ vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+ if (!vxlanp) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (atomic_dec_and_test(&vxlanp->refcount)) {
+ hash_del(&vxlanp->hlist);
+ remove = true;
+ }
+
+out_unlock:
+ spin_unlock_bh(&vxlan->lock);
+
+ if (remove) {
+ mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+ kfree(vxlanp);
+ vxlan->num_ports--;
+ }
+
+ mutex_unlock(&vxlan->sync_lock);
+
+ return ret;
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_vxlan *vxlan;
+
+ if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev))
+ return ERR_PTR(-ENOTSUPP);
+
+ vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
+ if (!vxlan)
+ return ERR_PTR(-ENOMEM);
+
+ vxlan->mdev = mdev;
+ mutex_init(&vxlan->sync_lock);
+ spin_lock_init(&vxlan->lock);
+ hash_init(vxlan->htable);
+
+ /* Hardware adds 4789 by default */
+ mlx5_vxlan_add_port(vxlan, 4789);
+
+ return vxlan;
+}
+
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ struct hlist_node *tmp;
+ int bkt;
+
+ if (!mlx5_vxlan_allowed(vxlan))
+ return;
+
+ /* Lockless since we are the only hash table consumers*/
+ hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
+ hash_del(&vxlanp->hlist);
+ mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
+ kfree(vxlanp);
+ }
+
+ kfree(vxlan);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
new file mode 100644
index 000000000..8fb0eb08f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_VXLAN_H__
+#define __MLX5_VXLAN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vxlan;
+struct mlx5_vxlan_port;
+
+static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
+{
+ /* not allowed reason is encoded in vxlan pointer as error,
+ * on mlx5_vxlan_create
+ */
+ return !IS_ERR_OR_NULL(vxlan);
+}
+
+#if IS_ENABLED(CONFIG_VXLAN)
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+#else
+static inline struct mlx5_vxlan*
+mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
+static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
+static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
+static inline struct mx5_vxlan_port*
+mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return NULL; }
+#endif
+
+#endif /* __MLX5_VXLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
new file mode 100644
index 000000000..3a3b0005f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+ u16 opmod, u8 port)
+{
+ int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+ int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+ int err = -ENOMEM;
+ void *data;
+ void *resp;
+ u32 *out;
+ u32 *in;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!in || !out)
+ goto out;
+
+ MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+ MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+ MLX5_SET(mad_ifc_in, in, port, port);
+
+ data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+ memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ if (err)
+ goto out;
+
+ resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+ memcpy(outb, resp,
+ MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
+
+out:
+ kfree(out);
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
new file mode 100644
index 000000000..a2b25afa2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -0,0 +1,1721 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/io-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/debugfs.h>
+#include <linux/kmod.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+#include <linux/version.h>
+#include <net/devlink.h>
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "lib/mpfs.h"
+#include "eswitch.h"
+#include "lib/mlx5.h"
+#include "fpga/core.h"
+#include "fpga/ipsec.h"
+#include "accel/ipsec.h"
+#include "accel/tls.h"
+#include "lib/clock.h"
+#include "lib/vxlan.h"
+#include "diag/fw_tracer.h"
+
+MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+unsigned int mlx5_core_debug_mask;
+module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644);
+MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
+
+#define MLX5_DEFAULT_PROF 2
+static unsigned int prof_sel = MLX5_DEFAULT_PROF;
+module_param_named(prof_sel, prof_sel, uint, 0444);
+MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+
+static u32 sw_owner_id[4];
+
+enum {
+ MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+};
+
+static struct mlx5_profile profile[] = {
+ [0] = {
+ .mask = 0,
+ },
+ [1] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE,
+ .log_max_qp = 12,
+ },
+ [2] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE |
+ MLX5_PROF_MASK_MR_CACHE,
+ .log_max_qp = 18,
+ .mr_cache[0] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[1] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[2] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[3] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[4] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[5] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[6] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[7] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[8] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[9] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[10] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[11] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[12] = {
+ .size = 64,
+ .limit = 32
+ },
+ .mr_cache[13] = {
+ .size = 32,
+ .limit = 16
+ },
+ .mr_cache[14] = {
+ .size = 16,
+ .limit = 8
+ },
+ .mr_cache[15] = {
+ .size = 8,
+ .limit = 4
+ },
+ },
+};
+
+#define FW_INIT_TIMEOUT_MILI 2000
+#define FW_INIT_WAIT_MS 2
+#define FW_PRE_INIT_TIMEOUT_MILI 10000
+
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+{
+ unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
+ int err = 0;
+
+ while (fw_initializing(dev)) {
+ if (time_after(jiffies, end)) {
+ err = -EBUSY;
+ break;
+ }
+ msleep(FW_INIT_WAIT_MS);
+ }
+
+ return err;
+}
+
+static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
+{
+ int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
+ driver_version);
+ u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {0};
+ u8 out[MLX5_ST_SZ_BYTES(set_driver_version_out)] = {0};
+ int remaining_size = driver_ver_sz;
+ char *string;
+
+ if (!MLX5_CAP_GEN(dev, driver_version))
+ return;
+
+ string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
+
+ strncpy(string, "Linux", remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+ strncat(string, ",", remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+ strncat(string, DRIVER_NAME, remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+ strncat(string, ",", remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+
+ snprintf(string + strlen(string), remaining_size, "%u.%u.%u",
+ (u8)((LINUX_VERSION_CODE >> 16) & 0xff), (u8)((LINUX_VERSION_CODE >> 8) & 0xff),
+ (u16)(LINUX_VERSION_CODE & 0xffff));
+
+ /*Send the command*/
+ MLX5_SET(set_driver_version_in, in, opcode,
+ MLX5_CMD_OP_SET_DRIVER_VERSION);
+
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int set_dma_caps(struct pci_dev *pdev)
+{
+ int err;
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
+ return err;
+ }
+ }
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev,
+ "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev,
+ "Can't set consistent PCI DMA mask, aborting\n");
+ return err;
+ }
+ }
+
+ dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
+ return err;
+}
+
+static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err = 0;
+
+ mutex_lock(&dev->pci_status_mutex);
+ if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
+ err = pci_enable_device(pdev);
+ if (!err)
+ dev->pci_status = MLX5_PCI_STATUS_ENABLED;
+ }
+ mutex_unlock(&dev->pci_status_mutex);
+
+ return err;
+}
+
+static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+
+ mutex_lock(&dev->pci_status_mutex);
+ if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
+ pci_disable_device(pdev);
+ dev->pci_status = MLX5_PCI_STATUS_DISABLED;
+ }
+ mutex_unlock(&dev->pci_status_mutex);
+}
+
+static int request_bar(struct pci_dev *pdev)
+{
+ int err = 0;
+
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing registers BAR, aborting\n");
+ return -ENODEV;
+ }
+
+ err = pci_request_regions(pdev, DRIVER_NAME);
+ if (err)
+ dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
+
+ return err;
+}
+
+static void release_bar(struct pci_dev *pdev)
+{
+ pci_release_regions(pdev);
+}
+
+static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_eq_table *table = &priv->eq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int nvec;
+ int err;
+
+ nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+ MLX5_EQ_VEC_COMP_BASE;
+ nvec = min_t(int, nvec, num_eqs);
+ if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+ return -ENOMEM;
+
+ priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
+ if (!priv->irq_info)
+ return -ENOMEM;
+
+ nvec = pci_alloc_irq_vectors(dev->pdev,
+ MLX5_EQ_VEC_COMP_BASE + 1, nvec,
+ PCI_IRQ_MSIX);
+ if (nvec < 0) {
+ err = nvec;
+ goto err_free_irq_info;
+ }
+
+ table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+
+ return 0;
+
+err_free_irq_info:
+ kfree(priv->irq_info);
+ return err;
+}
+
+static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
+ pci_free_irq_vectors(dev->pdev);
+ kfree(priv->irq_info);
+}
+
+struct mlx5_reg_host_endianness {
+ u8 he;
+ u8 rsvd[15];
+};
+
+#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
+
+enum {
+ MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
+ MLX5_DEV_CAP_FLAG_DCT,
+};
+
+static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
+{
+ switch (size) {
+ case 128:
+ return 0;
+ case 256:
+ return 1;
+ case 512:
+ return 2;
+ case 1024:
+ return 3;
+ case 2048:
+ return 4;
+ case 4096:
+ return 5;
+ default:
+ mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
+ return 0;
+ }
+}
+
+static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
+ enum mlx5_cap_type cap_type,
+ enum mlx5_cap_mode cap_mode)
+{
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *out, *hca_caps;
+ u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
+ int err;
+
+ memset(in, 0, sizeof(in));
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ if (err) {
+ mlx5_core_warn(dev,
+ "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
+ cap_type, cap_mode, err);
+ goto query_ex;
+ }
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+
+ switch (cap_mode) {
+ case HCA_CAP_OPMOD_GET_MAX:
+ memcpy(dev->caps.hca_max[cap_type], hca_caps,
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ break;
+ case HCA_CAP_OPMOD_GET_CUR:
+ memcpy(dev->caps.hca_cur[cap_type], hca_caps,
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ break;
+ default:
+ mlx5_core_warn(dev,
+ "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
+ cap_type, cap_mode);
+ err = -EINVAL;
+ break;
+ }
+query_ex:
+ kfree(out);
+ return err;
+}
+
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
+{
+ int ret;
+
+ ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
+ if (ret)
+ return ret;
+ return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
+{
+ u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
+
+ MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
+ return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+}
+
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+{
+ void *set_ctx;
+ void *set_hca_cap;
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ int req_endianness;
+ int err;
+
+ if (MLX5_CAP_GEN(dev, atomic)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
+ if (err)
+ return err;
+ } else {
+ return 0;
+ }
+
+ req_endianness =
+ MLX5_CAP_ATOMIC(dev,
+ supported_atomic_req_8B_endianness_mode_1);
+
+ if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
+ return 0;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+
+ /* Set requestor to host endianness */
+ MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
+
+ err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
+
+ kfree(set_ctx);
+ return err;
+}
+
+static int handle_hca_cap(struct mlx5_core_dev *dev)
+{
+ void *set_ctx = NULL;
+ struct mlx5_profile *prof = dev->profile;
+ int err = -ENOMEM;
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *set_hca_cap;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ goto query_ex;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
+ if (err)
+ goto query_ex;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
+ capability);
+ memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL],
+ MLX5_ST_SZ_BYTES(cmd_hca_cap));
+
+ mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
+ mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
+ 128);
+ /* we limit the size of the pkey table to 128 entries for now */
+ MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
+ to_fw_pkey_sz(dev, 128));
+
+ /* Check log_max_qp from HCA caps to set in current profile */
+ if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) {
+ mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
+ profile[prof_sel].log_max_qp,
+ MLX5_CAP_GEN_MAX(dev, log_max_qp));
+ profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+ }
+ if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
+ MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
+ prof->log_max_qp);
+
+ /* disable cmdif checksum */
+ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
+
+ /* Enable 4K UAR only when HCA supports it and page size is bigger
+ * than 4K.
+ */
+ if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
+ MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
+
+ MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
+
+ if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte))
+ MLX5_SET(cmd_hca_cap,
+ set_hca_cap,
+ cache_line_128byte,
+ cache_line_size() >= 128 ? 1 : 0);
+
+ if (MLX5_CAP_GEN_MAX(dev, dct))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
+
+ if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
+ MLX5_SET(cmd_hca_cap,
+ set_hca_cap,
+ num_vhca_ports,
+ MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
+
+ err = set_caps(dev, set_ctx, set_sz,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+
+query_ex:
+ kfree(set_ctx);
+ return err;
+}
+
+static int set_hca_ctrl(struct mlx5_core_dev *dev)
+{
+ struct mlx5_reg_host_endianness he_in;
+ struct mlx5_reg_host_endianness he_out;
+ int err;
+
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ memset(&he_in, 0, sizeof(he_in));
+ he_in.he = MLX5_SET_HOST_ENDIANNESS;
+ err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in),
+ &he_out, sizeof(he_out),
+ MLX5_REG_HOST_ENDIANNESS, 0, 1);
+ return err;
+}
+
+static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
+{
+ int ret = 0;
+
+ /* Disable local_lb by default */
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+ ret = mlx5_nic_vport_update_local_lb(dev, false);
+
+ return ret;
+}
+
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0};
+
+ MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+ MLX5_SET(enable_hca_in, in, function_id, func_id);
+ return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+}
+
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0};
+
+ MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+ MLX5_SET(disable_hca_in, in, function_id, func_id);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
+{
+ u32 timer_h, timer_h1, timer_l;
+
+ timer_h = ioread32be(&dev->iseg->internal_timer_h);
+ timer_l = ioread32be(&dev->iseg->internal_timer_l);
+ timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
+ if (timer_h != timer_h1) /* wrap around */
+ timer_l = ioread32be(&dev->iseg->internal_timer_l);
+
+ return (u64)timer_l | (u64)timer_h1 << 32;
+}
+
+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+
+ if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+ priv->irq_info[vecidx].mask);
+
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+ return 0;
+}
+
+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ struct mlx5_priv *priv = &mdev->priv;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+
+ irq_set_affinity_hint(irq, NULL);
+ free_cpumask_var(priv->irq_info[vecidx].mask);
+}
+
+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
+ err = mlx5_irq_set_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ mlx5_irq_clear_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
+ mlx5_irq_clear_affinity_hint(mdev, i);
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+ unsigned int *irqn)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq, *n;
+ int err = -ENOENT;
+
+ spin_lock(&table->lock);
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (eq->index == vector) {
+ *eqn = eq->eqn;
+ *irqn = eq->irqn;
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock(&table->lock);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq;
+
+ spin_lock(&table->lock);
+ list_for_each_entry(eq, &table->comp_eqs_list, list)
+ if (eq->eqn == eqn) {
+ spin_unlock(&table->lock);
+ return eq;
+ }
+
+ spin_unlock(&table->lock);
+
+ return ERR_PTR(-ENOENT);
+}
+
+static void free_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq, *n;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (dev->rmap) {
+ free_irq_cpu_rmap(dev->rmap);
+ dev->rmap = NULL;
+ }
+#endif
+ spin_lock(&table->lock);
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ list_del(&eq->list);
+ spin_unlock(&table->lock);
+ if (mlx5_destroy_unmap_eq(dev, eq))
+ mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
+ eq->eqn);
+ kfree(eq);
+ spin_lock(&table->lock);
+ }
+ spin_unlock(&table->lock);
+}
+
+static int alloc_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ char name[MLX5_MAX_IRQ_NAME];
+ struct mlx5_eq *eq;
+ int ncomp_vec;
+ int nent;
+ int err;
+ int i;
+
+ INIT_LIST_HEAD(&table->comp_eqs_list);
+ ncomp_vec = table->num_comp_vectors;
+ nent = MLX5_COMP_EQ_SIZE;
+#ifdef CONFIG_RFS_ACCEL
+ dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
+ if (!dev->rmap)
+ return -ENOMEM;
+#endif
+ for (i = 0; i < ncomp_vec; i++) {
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq) {
+ err = -ENOMEM;
+ goto clean;
+ }
+
+#ifdef CONFIG_RFS_ACCEL
+ irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
+ MLX5_EQ_VEC_COMP_BASE + i));
+#endif
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+ err = mlx5_create_map_eq(dev, eq,
+ i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
+ name, MLX5_EQ_TYPE_COMP);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
+ eq->index = i;
+ spin_lock(&table->lock);
+ list_add_tail(&eq->list, &table->comp_eqs_list);
+ spin_unlock(&table->lock);
+ }
+
+ return 0;
+
+clean:
+ free_comp_eqs(dev);
+ return err;
+}
+
+static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
+{
+ u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
+ u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
+ u32 sup_issi;
+ int err;
+
+ MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
+ err = mlx5_cmd_exec(dev, query_in, sizeof(query_in),
+ query_out, sizeof(query_out));
+ if (err) {
+ u32 syndrome;
+ u8 status;
+
+ mlx5_cmd_mbox_status(query_out, &status, &syndrome);
+ if (!status || syndrome == MLX5_DRIVER_SYND) {
+ mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
+ err, status, syndrome);
+ return err;
+ }
+
+ mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n");
+ dev->issi = 0;
+ return 0;
+ }
+
+ sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
+
+ if (sup_issi & (1 << 1)) {
+ u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {0};
+ u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
+
+ MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
+ MLX5_SET(set_issi_in, set_in, current_issi, 1);
+ err = mlx5_cmd_exec(dev, set_in, sizeof(set_in),
+ set_out, sizeof(set_out));
+ if (err) {
+ mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
+ err);
+ return err;
+ }
+
+ dev->issi = 1;
+
+ return 0;
+ } else if (sup_issi & (1 << 0) || !sup_issi) {
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err = 0;
+
+ pci_set_drvdata(dev->pdev, dev);
+ strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
+ priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
+
+ mutex_init(&priv->pgdir_mutex);
+ INIT_LIST_HEAD(&priv->pgdir_list);
+ spin_lock_init(&priv->mkey_lock);
+
+ mutex_init(&priv->alloc_mutex);
+
+ priv->numa_node = dev_to_node(&dev->pdev->dev);
+
+ if (mlx5_debugfs_root)
+ priv->dbg_root =
+ debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root);
+
+ err = mlx5_pci_enable_device(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
+ goto err_dbg;
+ }
+
+ err = request_bar(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "error requesting BARs, aborting\n");
+ goto err_disable;
+ }
+
+ pci_set_master(pdev);
+
+ err = set_dma_caps(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
+ goto err_clr_master;
+ }
+
+ dev->iseg_base = pci_resource_start(dev->pdev, 0);
+ dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
+ if (!dev->iseg) {
+ err = -ENOMEM;
+ dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
+ goto err_clr_master;
+ }
+
+ return 0;
+
+err_clr_master:
+ pci_clear_master(dev->pdev);
+ release_bar(dev->pdev);
+err_disable:
+ mlx5_pci_disable_device(dev);
+
+err_dbg:
+ debugfs_remove(priv->dbg_root);
+ return err;
+}
+
+static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+ iounmap(dev->iseg);
+ pci_clear_master(dev->pdev);
+ release_bar(dev->pdev);
+ mlx5_pci_disable_device(dev);
+ debugfs_remove_recursive(priv->dbg_root);
+}
+
+static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err;
+
+ err = mlx5_query_board_id(dev);
+ if (err) {
+ dev_err(&pdev->dev, "query board id failed\n");
+ goto out;
+ }
+
+ err = mlx5_eq_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize eq\n");
+ goto out;
+ }
+
+ err = mlx5_cq_debugfs_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
+ goto err_eq_cleanup;
+ }
+
+ mlx5_init_qp_table(dev);
+
+ mlx5_init_srq_table(dev);
+
+ mlx5_init_mkey_table(dev);
+
+ mlx5_init_reserved_gids(dev);
+
+ mlx5_init_clock(dev);
+
+ dev->vxlan = mlx5_vxlan_create(dev);
+
+ err = mlx5_init_rl_table(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init rate limiting\n");
+ goto err_tables_cleanup;
+ }
+
+ err = mlx5_mpfs_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init l2 table %d\n", err);
+ goto err_rl_cleanup;
+ }
+
+ err = mlx5_eswitch_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
+ goto err_mpfs_cleanup;
+ }
+
+ err = mlx5_sriov_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init sriov %d\n", err);
+ goto err_eswitch_cleanup;
+ }
+
+ err = mlx5_fpga_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init fpga device %d\n", err);
+ goto err_sriov_cleanup;
+ }
+
+ dev->tracer = mlx5_fw_tracer_create(dev);
+
+ return 0;
+
+err_sriov_cleanup:
+ mlx5_sriov_cleanup(dev);
+err_eswitch_cleanup:
+ mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_mpfs_cleanup:
+ mlx5_mpfs_cleanup(dev);
+err_rl_cleanup:
+ mlx5_cleanup_rl_table(dev);
+err_tables_cleanup:
+ mlx5_vxlan_destroy(dev->vxlan);
+ mlx5_cleanup_mkey_table(dev);
+ mlx5_cleanup_srq_table(dev);
+ mlx5_cleanup_qp_table(dev);
+ mlx5_cq_debugfs_cleanup(dev);
+
+err_eq_cleanup:
+ mlx5_eq_cleanup(dev);
+
+out:
+ return err;
+}
+
+static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
+{
+ mlx5_fw_tracer_destroy(dev->tracer);
+ mlx5_fpga_cleanup(dev);
+ mlx5_sriov_cleanup(dev);
+ mlx5_eswitch_cleanup(dev->priv.eswitch);
+ mlx5_mpfs_cleanup(dev);
+ mlx5_cleanup_rl_table(dev);
+ mlx5_vxlan_destroy(dev->vxlan);
+ mlx5_cleanup_clock(dev);
+ mlx5_cleanup_reserved_gids(dev);
+ mlx5_cleanup_mkey_table(dev);
+ mlx5_cleanup_srq_table(dev);
+ mlx5_cleanup_qp_table(dev);
+ mlx5_cq_debugfs_cleanup(dev);
+ mlx5_eq_cleanup(dev);
+}
+
+static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
+ bool boot)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err;
+
+ mutex_lock(&dev->intf_state_mutex);
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
+ __func__);
+ goto out;
+ }
+
+ dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+ fw_rev_min(dev), fw_rev_sub(dev));
+
+ /* Only PFs hold the relevant PCIe information for this query */
+ if (mlx5_core_is_pf(dev))
+ pcie_print_link_status(dev->pdev);
+
+ /* on load removing any previous indication of internal error, device is
+ * up
+ */
+ dev->state = MLX5_DEVICE_STATE_UP;
+
+ /* wait for firmware to accept initialization segments configurations
+ */
+ err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+ if (err) {
+ dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
+ FW_PRE_INIT_TIMEOUT_MILI);
+ goto out_err;
+ }
+
+ err = mlx5_cmd_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
+ goto out_err;
+ }
+
+ err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+ if (err) {
+ dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
+ FW_INIT_TIMEOUT_MILI);
+ goto err_cmd_cleanup;
+ }
+
+ err = mlx5_core_enable_hca(dev, 0);
+ if (err) {
+ dev_err(&pdev->dev, "enable hca failed\n");
+ goto err_cmd_cleanup;
+ }
+
+ err = mlx5_core_set_issi(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to set issi\n");
+ goto err_disable_hca;
+ }
+
+ err = mlx5_satisfy_startup_pages(dev, 1);
+ if (err) {
+ dev_err(&pdev->dev, "failed to allocate boot pages\n");
+ goto err_disable_hca;
+ }
+
+ err = set_hca_ctrl(dev);
+ if (err) {
+ dev_err(&pdev->dev, "set_hca_ctrl failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = handle_hca_cap(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = handle_hca_cap_atomic(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = mlx5_satisfy_startup_pages(dev, 0);
+ if (err) {
+ dev_err(&pdev->dev, "failed to allocate init pages\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = mlx5_pagealloc_start(dev);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = mlx5_cmd_init_hca(dev, sw_owner_id);
+ if (err) {
+ dev_err(&pdev->dev, "init hca failed\n");
+ goto err_pagealloc_stop;
+ }
+
+ mlx5_set_driver_version(dev);
+
+ mlx5_start_health_poll(dev);
+
+ err = mlx5_query_hca_caps(dev);
+ if (err) {
+ dev_err(&pdev->dev, "query hca failed\n");
+ goto err_stop_poll;
+ }
+
+ if (boot) {
+ err = mlx5_init_once(dev, priv);
+ if (err) {
+ dev_err(&pdev->dev, "sw objs init failed\n");
+ goto err_stop_poll;
+ }
+ }
+
+ err = mlx5_alloc_irq_vectors(dev);
+ if (err) {
+ dev_err(&pdev->dev, "alloc irq vectors failed\n");
+ goto err_cleanup_once;
+ }
+
+ dev->priv.uar = mlx5_get_uars_page(dev);
+ if (IS_ERR(dev->priv.uar)) {
+ dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
+ err = PTR_ERR(dev->priv.uar);
+ goto err_disable_msix;
+ }
+
+ err = mlx5_start_eqs(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
+ goto err_put_uars;
+ }
+
+ err = mlx5_fw_tracer_init(dev->tracer);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init FW tracer\n");
+ goto err_fw_tracer;
+ }
+
+ err = alloc_comp_eqs(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
+ goto err_comp_eqs;
+ }
+
+ err = mlx5_irq_set_affinity_hints(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
+ goto err_affinity_hints;
+ }
+
+ err = mlx5_fpga_device_start(dev);
+ if (err) {
+ dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+ goto err_fpga_start;
+ }
+
+ err = mlx5_accel_ipsec_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+ goto err_ipsec_start;
+ }
+
+ err = mlx5_accel_tls_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "TLS device start failed %d\n", err);
+ goto err_tls_start;
+ }
+
+ err = mlx5_init_fs(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init flow steering\n");
+ goto err_fs;
+ }
+
+ err = mlx5_core_set_hca_defaults(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to set hca defaults\n");
+ goto err_fs;
+ }
+
+ err = mlx5_sriov_attach(dev);
+ if (err) {
+ dev_err(&pdev->dev, "sriov init failed %d\n", err);
+ goto err_sriov;
+ }
+
+ if (mlx5_device_registered(dev)) {
+ mlx5_attach_device(dev);
+ } else {
+ err = mlx5_register_device(dev);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
+ goto err_reg_dev;
+ }
+ }
+
+ set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+
+ return 0;
+
+err_reg_dev:
+ mlx5_sriov_detach(dev);
+
+err_sriov:
+ mlx5_cleanup_fs(dev);
+
+err_fs:
+ mlx5_accel_tls_cleanup(dev);
+
+err_tls_start:
+ mlx5_accel_ipsec_cleanup(dev);
+
+err_ipsec_start:
+ mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
+ mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
+ free_comp_eqs(dev);
+
+err_comp_eqs:
+ mlx5_fw_tracer_cleanup(dev->tracer);
+
+err_fw_tracer:
+ mlx5_stop_eqs(dev);
+
+err_put_uars:
+ mlx5_put_uars_page(dev, priv->uar);
+
+err_disable_msix:
+ mlx5_free_irq_vectors(dev);
+
+err_cleanup_once:
+ if (boot)
+ mlx5_cleanup_once(dev);
+
+err_stop_poll:
+ mlx5_stop_health_poll(dev, boot);
+ if (mlx5_cmd_teardown_hca(dev)) {
+ dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
+ goto out_err;
+ }
+
+err_pagealloc_stop:
+ mlx5_pagealloc_stop(dev);
+
+reclaim_boot_pages:
+ mlx5_reclaim_startup_pages(dev);
+
+err_disable_hca:
+ mlx5_core_disable_hca(dev, 0);
+
+err_cmd_cleanup:
+ mlx5_cmd_cleanup(dev);
+
+out_err:
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ mutex_unlock(&dev->intf_state_mutex);
+
+ return err;
+}
+
+static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
+ bool cleanup)
+{
+ int err = 0;
+
+ if (cleanup)
+ mlx5_drain_health_recovery(dev);
+
+ mutex_lock(&dev->intf_state_mutex);
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
+ __func__);
+ if (cleanup)
+ mlx5_cleanup_once(dev);
+ goto out;
+ }
+
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+
+ if (mlx5_device_registered(dev))
+ mlx5_detach_device(dev);
+
+ mlx5_sriov_detach(dev);
+ mlx5_cleanup_fs(dev);
+ mlx5_accel_ipsec_cleanup(dev);
+ mlx5_accel_tls_cleanup(dev);
+ mlx5_fpga_device_stop(dev);
+ mlx5_irq_clear_affinity_hints(dev);
+ free_comp_eqs(dev);
+ mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_stop_eqs(dev);
+ mlx5_put_uars_page(dev, priv->uar);
+ mlx5_free_irq_vectors(dev);
+ if (cleanup)
+ mlx5_cleanup_once(dev);
+ mlx5_stop_health_poll(dev, cleanup);
+ err = mlx5_cmd_teardown_hca(dev);
+ if (err) {
+ dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
+ goto out;
+ }
+ mlx5_pagealloc_stop(dev);
+ mlx5_reclaim_startup_pages(dev);
+ mlx5_core_disable_hca(dev, 0);
+ mlx5_cmd_cleanup(dev);
+
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+ return err;
+}
+
+struct mlx5_core_event_handler {
+ void (*event)(struct mlx5_core_dev *dev,
+ enum mlx5_dev_event event,
+ void *data);
+};
+
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_ESWITCH
+ .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+ .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+ .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
+ .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
+ .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
+ .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
+#endif
+};
+
+#define MLX5_IB_MOD "mlx5_ib"
+static int init_one(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct mlx5_core_dev *dev;
+ struct devlink *devlink;
+ struct mlx5_priv *priv;
+ int err;
+
+ devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
+ if (!devlink) {
+ dev_err(&pdev->dev, "kzalloc failed\n");
+ return -ENOMEM;
+ }
+
+ dev = devlink_priv(devlink);
+ priv = &dev->priv;
+ priv->pci_dev_data = id->driver_data;
+
+ pci_set_drvdata(pdev, dev);
+
+ dev->pdev = pdev;
+ dev->event = mlx5_core_event;
+ dev->profile = &profile[prof_sel];
+
+ INIT_LIST_HEAD(&priv->ctx_list);
+ spin_lock_init(&priv->ctx_lock);
+ mutex_init(&dev->pci_status_mutex);
+ mutex_init(&dev->intf_state_mutex);
+
+ INIT_LIST_HEAD(&priv->waiting_events_list);
+ priv->is_accum_events = false;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ err = init_srcu_struct(&priv->pfault_srcu);
+ if (err) {
+ dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
+ err);
+ goto clean_dev;
+ }
+#endif
+ mutex_init(&priv->bfregs.reg_head.lock);
+ mutex_init(&priv->bfregs.wc_head.lock);
+ INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
+ INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
+
+ err = mlx5_pci_init(dev, priv);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
+ goto clean_srcu;
+ }
+
+ err = mlx5_health_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err);
+ goto close_pci;
+ }
+
+ mlx5_pagealloc_init(dev);
+
+ err = mlx5_load_one(dev, priv, true);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
+ goto clean_health;
+ }
+
+ request_module_nowait(MLX5_IB_MOD);
+
+ err = devlink_register(devlink, &pdev->dev);
+ if (err)
+ goto clean_load;
+
+ pci_save_state(pdev);
+ return 0;
+
+clean_load:
+ mlx5_unload_one(dev, priv, true);
+clean_health:
+ mlx5_pagealloc_cleanup(dev);
+ mlx5_health_cleanup(dev);
+close_pci:
+ mlx5_pci_close(dev, priv);
+clean_srcu:
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ cleanup_srcu_struct(&priv->pfault_srcu);
+clean_dev:
+#endif
+ devlink_free(devlink);
+
+ return err;
+}
+
+static void remove_one(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_priv *priv = &dev->priv;
+
+ devlink_unregister(devlink);
+ mlx5_unregister_device(dev);
+
+ if (mlx5_unload_one(dev, priv, true)) {
+ dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
+ mlx5_health_cleanup(dev);
+ return;
+ }
+
+ mlx5_pagealloc_cleanup(dev);
+ mlx5_health_cleanup(dev);
+ mlx5_pci_close(dev, priv);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ cleanup_srcu_struct(&priv->pfault_srcu);
+#endif
+ devlink_free(devlink);
+}
+
+static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_priv *priv = &dev->priv;
+
+ dev_info(&pdev->dev, "%s was called\n", __func__);
+
+ mlx5_enter_error_state(dev, false);
+ mlx5_unload_one(dev, priv, false);
+ /* In case of kernel call drain the health wq */
+ if (state) {
+ mlx5_drain_health_wq(dev);
+ mlx5_pci_disable_device(dev);
+ }
+
+ return state == pci_channel_io_perm_failure ?
+ PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+/* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
+ */
+static int wait_vital(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_health *health = &dev->priv.health;
+ const int niter = 100;
+ u32 last_count = 0;
+ u32 count;
+ int i;
+
+ for (i = 0; i < niter; i++) {
+ count = ioread32be(health->health_counter);
+ if (count && count != 0xffffffff) {
+ if (last_count && last_count != count) {
+ dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+ return 0;
+ }
+ last_count = count;
+ }
+ msleep(50);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err;
+
+ dev_info(&pdev->dev, "%s was called\n", __func__);
+
+ err = mlx5_pci_enable_device(dev);
+ if (err) {
+ dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
+ , __func__, err);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ if (wait_vital(pdev)) {
+ dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void mlx5_pci_resume(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_priv *priv = &dev->priv;
+ int err;
+
+ dev_info(&pdev->dev, "%s was called\n", __func__);
+
+ err = mlx5_load_one(dev, priv, false);
+ if (err)
+ dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
+ , __func__, err);
+ else
+ dev_info(&pdev->dev, "%s: device recovered\n", __func__);
+}
+
+static const struct pci_error_handlers mlx5_err_handler = {
+ .error_detected = mlx5_pci_err_detected,
+ .slot_reset = mlx5_pci_slot_reset,
+ .resume = mlx5_pci_resume
+};
+
+static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
+{
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev, force_teardown)) {
+ mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
+ return -EAGAIN;
+ }
+
+ /* Panic tear down fw command will stop the PCI bus communication
+ * with the HCA, so the health polll is no longer needed.
+ */
+ mlx5_drain_health_wq(dev);
+ mlx5_stop_health_poll(dev, false);
+
+ ret = mlx5_cmd_force_teardown_hca(dev);
+ if (ret) {
+ mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
+ mlx5_start_health_poll(dev);
+ return ret;
+ }
+
+ mlx5_enter_error_state(dev, true);
+
+ /* Some platforms requiring freeing the IRQ's in the shutdown
+ * flow. If they aren't freed they can't be allocated after
+ * kexec. There is no need to cleanup the mlx5_core software
+ * contexts.
+ */
+ mlx5_irq_clear_affinity_hints(dev);
+ mlx5_core_eq_free_irqs(dev);
+
+ return 0;
+}
+
+static void shutdown(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_priv *priv = &dev->priv;
+ int err;
+
+ dev_info(&pdev->dev, "Shutdown was called\n");
+ err = mlx5_try_fast_unload(dev);
+ if (err)
+ mlx5_unload_one(dev, priv, false);
+ mlx5_pci_disable_device(dev);
+}
+
+static const struct pci_device_id mlx5_core_pci_table[] = {
+ { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) },
+ { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */
+ { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) },
+ { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */
+ { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) },
+ { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */
+ { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */
+ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */
+ { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */
+ { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */
+ { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */
+ { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */
+ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
+ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
+
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+ mlx5_pci_err_detected(dev->pdev, 0);
+}
+
+void mlx5_recover_device(struct mlx5_core_dev *dev)
+{
+ mlx5_pci_disable_device(dev);
+ if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
+ mlx5_pci_resume(dev->pdev);
+}
+
+static struct pci_driver mlx5_core_driver = {
+ .name = DRIVER_NAME,
+ .id_table = mlx5_core_pci_table,
+ .probe = init_one,
+ .remove = remove_one,
+ .shutdown = shutdown,
+ .err_handler = &mlx5_err_handler,
+ .sriov_configure = mlx5_core_sriov_configure,
+};
+
+static void mlx5_core_verify_params(void)
+{
+ if (prof_sel >= ARRAY_SIZE(profile)) {
+ pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n",
+ prof_sel,
+ ARRAY_SIZE(profile) - 1,
+ MLX5_DEFAULT_PROF);
+ prof_sel = MLX5_DEFAULT_PROF;
+ }
+}
+
+static int __init init(void)
+{
+ int err;
+
+ get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
+
+ mlx5_core_verify_params();
+ mlx5_fpga_ipsec_build_fs_cmds();
+ mlx5_register_debugfs();
+
+ err = pci_register_driver(&mlx5_core_driver);
+ if (err)
+ goto err_debug;
+
+#ifdef CONFIG_MLX5_CORE_EN
+ mlx5e_init();
+#endif
+
+ return 0;
+
+err_debug:
+ mlx5_unregister_debugfs();
+ return err;
+}
+
+static void __exit cleanup(void)
+{
+#ifdef CONFIG_MLX5_CORE_EN
+ mlx5e_cleanup();
+#endif
+ pci_unregister_driver(&mlx5_core_driver);
+ mlx5_unregister_debugfs();
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
new file mode 100644
index 000000000..ba2b09cc1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_verbs.h>
+#include "mlx5_core.h"
+
+int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+ u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {0};
+ void *gid;
+
+ MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+ MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+ gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_attach_mcg);
+
+int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+ u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {0};
+ void *gid;
+
+ MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+ gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
new file mode 100644
index 000000000..b4134fa0b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_CORE_H__
+#define __MLX5_CORE_H__
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/if_link.h>
+#include <linux/firmware.h>
+#include <linux/mlx5/cq.h>
+
+#define DRIVER_NAME "mlx5_core"
+#define DRIVER_VERSION "5.0-0"
+
+extern uint mlx5_core_debug_mask;
+
+#define mlx5_core_dbg(__dev, format, ...) \
+ dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_dbg_once(__dev, format, ...) \
+ dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_dbg_mask(__dev, mask, format, ...) \
+do { \
+ if ((mask) & mlx5_core_debug_mask) \
+ mlx5_core_dbg(__dev, format, ##__VA_ARGS__); \
+} while (0)
+
+#define mlx5_core_err(__dev, format, ...) \
+ dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_err_rl(__dev, format, ...) \
+ dev_err_ratelimited(&(__dev)->pdev->dev, \
+ "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_warn(__dev, format, ...) \
+ dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_info(__dev, format, ...) \
+ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
+
+enum {
+ MLX5_CMD_DATA, /* print command payload only */
+ MLX5_CMD_TIME, /* print command execution time */
+};
+
+enum {
+ MLX5_DRIVER_STATUS_ABORTED = 0xfe,
+ MLX5_DRIVER_SYND = 0xbadd00de,
+};
+
+int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
+int mlx5_query_board_id(struct mlx5_core_dev *dev);
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
+void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+ unsigned long param);
+void mlx5_core_page_fault(struct mlx5_core_dev *dev,
+ struct mlx5_pagefault *pfault);
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
+void mlx5_disable_device(struct mlx5_core_dev *dev);
+void mlx5_recover_device(struct mlx5_core_dev *dev);
+int mlx5_sriov_init(struct mlx5_core_dev *dev);
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
+int mlx5_sriov_attach(struct mlx5_core_dev *dev);
+void mlx5_sriov_detach(struct mlx5_core_dev *dev);
+int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *context, u32 *element_id);
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *context, u32 element_id,
+ u32 modify_bitmask);
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ u32 element_id);
+int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
+
+int mlx5_eq_init(struct mlx5_core_dev *dev);
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+ int nent, u64 mask, const char *name,
+ enum mlx5_eq_type type);
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ u32 *out, int outlen);
+int mlx5_start_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
+void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
+ u8 access_reg_group);
+int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group,
+ u8 access_reg_group);
+int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
+ u8 feature_group, u8 access_reg_group);
+
+void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_remove(struct mlx5_core_dev *dev);
+
+void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
+void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
+void mlx5_attach_device(struct mlx5_core_dev *dev);
+void mlx5_detach_device(struct mlx5_core_dev *dev);
+bool mlx5_device_registered(struct mlx5_core_dev *dev);
+int mlx5_register_device(struct mlx5_core_dev *dev);
+void mlx5_unregister_device(struct mlx5_core_dev *dev);
+void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
+void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
+struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
+void mlx5_dev_list_lock(void);
+void mlx5_dev_list_unlock(void);
+int mlx5_dev_list_trylock(void);
+int mlx5_encap_alloc(struct mlx5_core_dev *dev,
+ int header_type,
+ size_t size,
+ void *encap_header,
+ u32 *encap_id);
+void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
+
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+ u8 namespace, u8 num_actions,
+ void *modify_actions, u32 *modify_header_id);
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id);
+
+bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
+
+int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size);
+int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size);
+int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
+int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
+
+#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \
+ MLX5_CAP_GEN((mdev), pps_modify) && \
+ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \
+ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
+
+int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw);
+
+void mlx5e_init(void);
+void mlx5e_cleanup(void);
+
+static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
+{
+ /* LACP owner conditions:
+ * 1) Function is physical.
+ * 2) LAG is supported by FW.
+ * 3) LAG is managed by driver (currently the only option).
+ */
+ return MLX5_CAP_GEN(dev, vport_group_manager) &&
+ (MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
+ MLX5_CAP_GEN(dev, lag_master);
+}
+
+int mlx5_lag_allow(struct mlx5_core_dev *dev);
+int mlx5_lag_forbid(struct mlx5_core_dev *dev);
+
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
+#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
new file mode 100644
index 000000000..0670165af
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_mkey_table *table = &dev->priv.mkey_table;
+
+ memset(table, 0, sizeof(*table));
+ rwlock_init(&table->lock);
+ INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+}
+
+void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
+{
+}
+
+int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
+ struct mlx5_core_mkey *mkey,
+ u32 *in, int inlen,
+ u32 *out, int outlen,
+ mlx5_cmd_cbk_t callback, void *context)
+{
+ struct mlx5_mkey_table *table = &dev->priv.mkey_table;
+ u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+ u32 mkey_index;
+ void *mkc;
+ int err;
+ u8 key;
+
+ spin_lock_irq(&dev->priv.mkey_lock);
+ key = dev->priv.mkey_key++;
+ spin_unlock_irq(&dev->priv.mkey_lock);
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
+ MLX5_SET(mkc, mkc, mkey_7_0, key);
+
+ if (callback)
+ return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen,
+ callback, context);
+
+ err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
+ if (err)
+ return err;
+
+ mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
+ mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
+ mkey->size = MLX5_GET64(mkc, mkc, len);
+ mkey->key = mlx5_idx_to_mkey(mkey_index) | key;
+ mkey->pd = MLX5_GET(mkc, mkc, pd);
+
+ mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
+ mkey_index, key, mkey->key);
+
+ /* connect to mkey tree */
+ write_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
+ write_unlock_irq(&table->lock);
+ if (err) {
+ mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+ mlx5_base_mkey(mkey->key), err);
+ mlx5_core_destroy_mkey(dev, mkey);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_mkey_cb);
+
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
+ struct mlx5_core_mkey *mkey,
+ u32 *in, int inlen)
+{
+ return mlx5_core_create_mkey_cb(dev, mkey, in, inlen,
+ NULL, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(mlx5_core_create_mkey);
+
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
+ struct mlx5_core_mkey *mkey)
+{
+ struct mlx5_mkey_table *table = &dev->priv.mkey_table;
+ u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
+ struct mlx5_core_mkey *deleted_mkey;
+ unsigned long flags;
+
+ write_lock_irqsave(&table->lock, flags);
+ deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
+ write_unlock_irqrestore(&table->lock, flags);
+ if (!deleted_mkey) {
+ mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
+ mlx5_base_mkey(mkey->key));
+ return -ENOENT;
+ }
+
+ MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_mkey);
+
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0};
+
+ memset(out, 0, outlen);
+ MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY);
+ MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_mkey);
+
+static inline u32 mlx5_get_psv(u32 *out, int psv_index)
+{
+ switch (psv_index) {
+ case 1: return MLX5_GET(create_psv_out, out, psv1_index);
+ case 2: return MLX5_GET(create_psv_out, out, psv2_index);
+ case 3: return MLX5_GET(create_psv_out, out, psv3_index);
+ default: return MLX5_GET(create_psv_out, out, psv0_index);
+ }
+}
+
+int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
+ int npsvs, u32 *sig_index)
+{
+ u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {0};
+ int i, err;
+
+ if (npsvs > MLX5_MAX_PSVS)
+ return -EINVAL;
+
+ MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV);
+ MLX5_SET(create_psv_in, in, pd, pdn);
+ MLX5_SET(create_psv_in, in, num_psv, npsvs);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ for (i = 0; i < npsvs; i++)
+ sig_index[i] = mlx5_get_psv(out, i);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_psv);
+
+int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {0};
+
+ MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV);
+ MLX5_SET(destroy_psv_in, in, psvn, psv_num);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_psv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
new file mode 100644
index 000000000..9c3653e06
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+ MLX5_PAGES_CANT_GIVE = 0,
+ MLX5_PAGES_GIVE = 1,
+ MLX5_PAGES_TAKE = 2
+};
+
+struct mlx5_pages_req {
+ struct mlx5_core_dev *dev;
+ u16 func_id;
+ s32 npages;
+ struct work_struct work;
+};
+
+struct fw_page {
+ struct rb_node rb_node;
+ u64 addr;
+ struct page *page;
+ u16 func_id;
+ unsigned long bitmask;
+ struct list_head list;
+ unsigned free_count;
+};
+
+enum {
+ MAX_RECLAIM_TIME_MSECS = 5000,
+ MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
+};
+
+enum {
+ MLX5_MAX_RECLAIM_TIME_MILI = 5000,
+ MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
+};
+
+static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
+{
+ struct rb_root *root = &dev->priv.page_root;
+ struct rb_node **new = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct fw_page *nfp;
+ struct fw_page *tfp;
+ int i;
+
+ while (*new) {
+ parent = *new;
+ tfp = rb_entry(parent, struct fw_page, rb_node);
+ if (tfp->addr < addr)
+ new = &parent->rb_left;
+ else if (tfp->addr > addr)
+ new = &parent->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
+ if (!nfp)
+ return -ENOMEM;
+
+ nfp->addr = addr;
+ nfp->page = page;
+ nfp->func_id = func_id;
+ nfp->free_count = MLX5_NUM_4K_IN_PAGE;
+ for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
+ set_bit(i, &nfp->bitmask);
+
+ rb_link_node(&nfp->rb_node, parent, new);
+ rb_insert_color(&nfp->rb_node, root);
+ list_add(&nfp->list, &dev->priv.free_list);
+
+ return 0;
+}
+
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
+{
+ struct rb_root *root = &dev->priv.page_root;
+ struct rb_node *tmp = root->rb_node;
+ struct fw_page *result = NULL;
+ struct fw_page *tfp;
+
+ while (tmp) {
+ tfp = rb_entry(tmp, struct fw_page, rb_node);
+ if (tfp->addr < addr) {
+ tmp = tmp->rb_left;
+ } else if (tfp->addr > addr) {
+ tmp = tmp->rb_right;
+ } else {
+ result = tfp;
+ break;
+ }
+ }
+
+ return result;
+}
+
+static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
+ s32 *npages, int boot)
+{
+ u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0};
+ int err;
+
+ MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
+ MLX5_SET(query_pages_in, in, op_mod, boot ?
+ MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
+ MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *npages = MLX5_GET(query_pages_out, out, num_pages);
+ *func_id = MLX5_GET(query_pages_out, out, function_id);
+
+ return err;
+}
+
+static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+{
+ struct fw_page *fp;
+ unsigned n;
+
+ if (list_empty(&dev->priv.free_list))
+ return -ENOMEM;
+
+ fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
+ n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
+ if (n >= MLX5_NUM_4K_IN_PAGE) {
+ mlx5_core_warn(dev, "alloc 4k bug\n");
+ return -ENOENT;
+ }
+ clear_bit(n, &fp->bitmask);
+ fp->free_count--;
+ if (!fp->free_count)
+ list_del(&fp->list);
+
+ *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE;
+
+ return 0;
+}
+
+#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
+
+static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+{
+ struct fw_page *fwp;
+ int n;
+
+ fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
+ if (!fwp) {
+ mlx5_core_warn(dev, "page not found\n");
+ return;
+ }
+
+ n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
+ fwp->free_count++;
+ set_bit(n, &fwp->bitmask);
+ if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+ rb_erase(&fwp->rb_node, &dev->priv.page_root);
+ if (fwp->free_count != 1)
+ list_del(&fwp->list);
+ dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(fwp->page);
+ kfree(fwp);
+ } else if (fwp->free_count == 1) {
+ list_add(&fwp->list, &dev->priv.free_list);
+ }
+}
+
+static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
+{
+ struct page *page;
+ u64 zero_addr = 1;
+ u64 addr;
+ int err;
+ int nid = dev_to_node(&dev->pdev->dev);
+
+ page = alloc_pages_node(nid, GFP_HIGHUSER, 0);
+ if (!page) {
+ mlx5_core_warn(dev, "failed to allocate page\n");
+ return -ENOMEM;
+ }
+map:
+ addr = dma_map_page(&dev->pdev->dev, page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&dev->pdev->dev, addr)) {
+ mlx5_core_warn(dev, "failed dma mapping page\n");
+ err = -ENOMEM;
+ goto err_mapping;
+ }
+
+ /* Firmware doesn't support page with physical address 0 */
+ if (addr == 0) {
+ zero_addr = addr;
+ goto map;
+ }
+
+ err = insert_page(dev, addr, page, func_id);
+ if (err) {
+ mlx5_core_err(dev, "failed to track allocated page\n");
+ dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+
+err_mapping:
+ if (err)
+ __free_page(page);
+
+ if (zero_addr == 0)
+ dma_unmap_page(&dev->pdev->dev, zero_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ return err;
+}
+
+static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
+ int err;
+
+ MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
+ MLX5_SET(manage_pages_in, in, function_id, func_id);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
+ func_id, err);
+}
+
+static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ int notify_fail)
+{
+ u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+ int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
+ u64 addr;
+ int err;
+ u32 *in;
+ int i;
+
+ inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
+ goto out_free;
+ }
+
+ for (i = 0; i < npages; i++) {
+retry:
+ err = alloc_4k(dev, &addr);
+ if (err) {
+ if (err == -ENOMEM)
+ err = alloc_system_page(dev, func_id);
+ if (err)
+ goto out_4k;
+
+ goto retry;
+ }
+ MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr);
+ }
+
+ MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
+ MLX5_SET(manage_pages_in, in, function_id, func_id);
+ MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (err) {
+ mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
+ func_id, npages, err);
+ goto out_4k;
+ }
+
+ dev->priv.fw_pages += npages;
+ if (func_id)
+ dev->priv.vfs_pages += npages;
+
+ mlx5_core_dbg(dev, "err %d\n", err);
+
+ kvfree(in);
+ return 0;
+
+out_4k:
+ for (i--; i >= 0; i--)
+ free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
+out_free:
+ kvfree(in);
+ if (notify_fail)
+ page_notify_fail(dev, func_id);
+ return err;
+}
+
+static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index,
+ u32 npages)
+{
+ u32 pages_set = 0;
+ unsigned int n;
+
+ for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) {
+ MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set,
+ fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE));
+ pages_set++;
+
+ if (!--npages)
+ break;
+ }
+
+ return pages_set;
+}
+
+static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
+ u32 *in, int in_size, u32 *out, int out_size)
+{
+ struct fw_page *fwp;
+ struct rb_node *p;
+ u32 func_id;
+ u32 npages;
+ u32 i = 0;
+
+ if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ return mlx5_cmd_exec(dev, in, in_size, out, out_size);
+
+ /* No hard feelings, we want our pages back! */
+ npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+ func_id = MLX5_GET(manage_pages_in, in, function_id);
+
+ p = rb_first(&dev->priv.page_root);
+ while (p && i < npages) {
+ fwp = rb_entry(p, struct fw_page, rb_node);
+ p = rb_next(p);
+ if (fwp->func_id != func_id)
+ continue;
+
+ i += fwp_fill_manage_pages_out(fwp, out, i, npages - i);
+ }
+
+ MLX5_SET(manage_pages_out, out, output_num_entries, i);
+ return 0;
+}
+
+static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
+ int *nclaimed)
+{
+ int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
+ u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
+ int num_claimed;
+ u32 *out;
+ int err;
+ int i;
+
+ if (nclaimed)
+ *nclaimed = 0;
+
+ outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
+ MLX5_SET(manage_pages_in, in, function_id, func_id);
+ MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+
+ mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
+ err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
+ if (err) {
+ mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
+ goto out_free;
+ }
+
+ num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries);
+ if (num_claimed > npages) {
+ mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n",
+ num_claimed, npages);
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ for (i = 0; i < num_claimed; i++)
+ free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
+
+ if (nclaimed)
+ *nclaimed = num_claimed;
+
+ dev->priv.fw_pages -= num_claimed;
+ if (func_id)
+ dev->priv.vfs_pages -= num_claimed;
+
+out_free:
+ kvfree(out);
+ return err;
+}
+
+static void pages_work_handler(struct work_struct *work)
+{
+ struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
+ struct mlx5_core_dev *dev = req->dev;
+ int err = 0;
+
+ if (req->npages < 0)
+ err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
+ else if (req->npages > 0)
+ err = give_pages(dev, req->func_id, req->npages, 1);
+
+ if (err)
+ mlx5_core_warn(dev, "%s fail %d\n",
+ req->npages < 0 ? "reclaim" : "give", err);
+
+ kfree(req);
+}
+
+void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
+ s32 npages)
+{
+ struct mlx5_pages_req *req;
+
+ req = kzalloc(sizeof(*req), GFP_ATOMIC);
+ if (!req) {
+ mlx5_core_warn(dev, "failed to allocate pages request\n");
+ return;
+ }
+
+ req->dev = dev;
+ req->func_id = func_id;
+ req->npages = npages;
+ INIT_WORK(&req->work, pages_work_handler);
+ queue_work(dev->priv.pg_wq, &req->work);
+}
+
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
+{
+ u16 uninitialized_var(func_id);
+ s32 uninitialized_var(npages);
+ int err;
+
+ err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
+ if (err)
+ return err;
+
+ mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
+ npages, boot ? "boot" : "init", func_id);
+
+ return give_pages(dev, func_id, npages, 0);
+}
+
+enum {
+ MLX5_BLKS_FOR_RECLAIM_PAGES = 12
+};
+
+static int optimal_reclaimed_pages(void)
+{
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_layout *lay;
+ int ret;
+
+ ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
+ MLX5_ST_SZ_BYTES(manage_pages_out)) /
+ MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
+
+ return ret;
+}
+
+int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+{
+ unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+ struct fw_page *fwp;
+ struct rb_node *p;
+ int nclaimed = 0;
+ int err = 0;
+
+ do {
+ p = rb_first(&dev->priv.page_root);
+ if (p) {
+ fwp = rb_entry(p, struct fw_page, rb_node);
+ err = reclaim_pages(dev, fwp->func_id,
+ optimal_reclaimed_pages(),
+ &nclaimed);
+
+ if (err) {
+ mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
+ err);
+ return err;
+ }
+ if (nclaimed)
+ end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+ }
+ if (time_after(jiffies, end)) {
+ mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
+ break;
+ }
+ } while (p);
+
+ WARN(dev->priv.fw_pages,
+ "FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.fw_pages);
+ WARN(dev->priv.vfs_pages,
+ "VFs FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.vfs_pages);
+
+ return 0;
+}
+
+void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+{
+ dev->priv.page_root = RB_ROOT;
+ INIT_LIST_HEAD(&dev->priv.free_list);
+}
+
+void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
+{
+ /* nothing */
+}
+
+int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+{
+ dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+ if (!dev->priv.pg_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
+{
+ destroy_workqueue(dev->priv.pg_wq);
+}
+
+int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
+{
+ unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+ int prev_vfs_pages = dev->priv.vfs_pages;
+
+ /* In case of internal error we will free the pages manually later */
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5_core_warn(dev, "Skipping wait for vf pages stage");
+ return 0;
+ }
+
+ mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
+ dev->priv.name);
+ while (dev->priv.vfs_pages) {
+ if (time_after(jiffies, end)) {
+ mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages);
+ return -ETIMEDOUT;
+ }
+ if (dev->priv.vfs_pages < prev_vfs_pages) {
+ end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+ prev_vfs_pages = dev->priv.vfs_pages;
+ }
+ msleep(50);
+ }
+
+ mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name);
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
new file mode 100644
index 000000000..bd830d8d6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {0};
+ int err;
+
+ MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *pdn = MLX5_GET(alloc_pd_out, out, pd);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_alloc_pd);
+
+int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {0};
+
+ MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, in, pd, pdn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
new file mode 100644
index 000000000..09b6b1bfb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -0,0 +1,1116 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/port.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+ int size_in, void *data_out, int size_out,
+ u16 reg_id, int arg, int write)
+{
+ int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
+ int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
+ int err = -ENOMEM;
+ u32 *out = NULL;
+ u32 *in = NULL;
+ void *data;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!in || !out)
+ goto out;
+
+ data = MLX5_ADDR_OF(access_register_in, in, register_data);
+ memcpy(data, data_in, size_in);
+
+ MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG);
+ MLX5_SET(access_register_in, in, op_mod, !write);
+ MLX5_SET(access_register_in, in, argument, arg);
+ MLX5_SET(access_register_in, in, register_id, reg_id);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ if (err)
+ goto out;
+
+ data = MLX5_ADDR_OF(access_register_out, out, register_data);
+ memcpy(data_out, data, size_out);
+
+out:
+ kvfree(out);
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
+
+int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
+ u8 access_reg_group)
+{
+ u32 in[MLX5_ST_SZ_DW(pcam_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(pcam_reg);
+
+ MLX5_SET(pcam_reg, in, feature_group, feature_group);
+ MLX5_SET(pcam_reg, in, access_reg_group, access_reg_group);
+
+ return mlx5_core_access_reg(dev, in, sz, pcam, sz, MLX5_REG_PCAM, 0, 0);
+}
+
+int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group,
+ u8 access_reg_group)
+{
+ u32 in[MLX5_ST_SZ_DW(mcam_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(mcam_reg);
+
+ MLX5_SET(mcam_reg, in, feature_group, feature_group);
+ MLX5_SET(mcam_reg, in, access_reg_group, access_reg_group);
+
+ return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0);
+}
+
+int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
+ u8 feature_group, u8 access_reg_group)
+{
+ u32 in[MLX5_ST_SZ_DW(qcam_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(qcam_reg);
+
+ MLX5_SET(qcam_reg, in, feature_group, feature_group);
+ MLX5_SET(qcam_reg, in, access_reg_group, access_reg_group);
+
+ return mlx5_core_access_reg(mdev, in, sz, qcam, sz, MLX5_REG_QCAM, 0, 0);
+}
+
+struct mlx5_reg_pcap {
+ u8 rsvd0;
+ u8 port_num;
+ u8 rsvd1[2];
+ __be32 caps_127_96;
+ __be32 caps_95_64;
+ __be32 caps_63_32;
+ __be32 caps_31_0;
+};
+
+int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps)
+{
+ struct mlx5_reg_pcap in;
+ struct mlx5_reg_pcap out;
+
+ memset(&in, 0, sizeof(in));
+ in.caps_127_96 = cpu_to_be32(caps);
+ in.port_num = port_num;
+
+ return mlx5_core_access_reg(dev, &in, sizeof(in), &out,
+ sizeof(out), MLX5_REG_PCAP, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_caps);
+
+int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
+ int ptys_size, int proto_mask, u8 local_port)
+{
+ u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+
+ MLX5_SET(ptys_reg, in, local_port, local_port);
+ MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
+ return mlx5_core_access_reg(dev, in, sizeof(in), ptys,
+ ptys_size, MLX5_REG_PTYS, 0, 0);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_ptys);
+
+int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
+{
+ u32 in[MLX5_ST_SZ_DW(mlcr_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(mlcr_reg)];
+
+ MLX5_SET(mlcr_reg, in, local_port, 1);
+ MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration);
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MLCR, 0, 1);
+}
+
+int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
+ u32 *proto_cap, int proto_mask)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
+ if (err)
+ return err;
+
+ if (proto_mask == MLX5_PTYS_EN)
+ *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
+ else
+ *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap);
+
+int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
+ u32 *proto_admin, int proto_mask)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
+ if (err)
+ return err;
+
+ if (proto_mask == MLX5_PTYS_EN)
+ *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
+ else
+ *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin);
+
+int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
+ u8 *link_width_oper, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port);
+ if (err)
+ return err;
+
+ *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
+
+int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
+ u32 *proto_oper, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN,
+ local_port);
+ if (err)
+ return err;
+
+ *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper);
+
+int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
+ u8 *proto_oper, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB,
+ local_port);
+ if (err)
+ return err;
+
+ *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
+
+int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
+ u32 proto_admin, int proto_mask)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+ u8 an_disable_admin;
+ u8 an_disable_cap;
+ u8 an_status;
+
+ mlx5_query_port_autoneg(dev, proto_mask, &an_status,
+ &an_disable_cap, &an_disable_admin);
+ if (!an_disable_cap && an_disable)
+ return -EPERM;
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(ptys_reg, in, local_port, 1);
+ MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
+ MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
+ if (proto_mask == MLX5_PTYS_EN)
+ MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
+ else
+ MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PTYS, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_ptys);
+
+/* This function should be used after setting a port register only */
+void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
+{
+ enum mlx5_port_status ps;
+
+ mlx5_query_port_admin_status(dev, &ps);
+ mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN);
+ if (ps == MLX5_PORT_UP)
+ mlx5_set_port_admin_status(dev, MLX5_PORT_UP);
+}
+EXPORT_SYMBOL_GPL(mlx5_toggle_port_link);
+
+int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
+ enum mlx5_port_status status)
+{
+ u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(paos_reg)];
+
+ MLX5_SET(paos_reg, in, local_port, 1);
+ MLX5_SET(paos_reg, in, admin_status, status);
+ MLX5_SET(paos_reg, in, ase, 1);
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PAOS, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status);
+
+int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
+ enum mlx5_port_status *status)
+{
+ u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(paos_reg)];
+ int err;
+
+ MLX5_SET(paos_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PAOS, 0, 0);
+ if (err)
+ return err;
+ *status = MLX5_GET(paos_reg, out, admin_status);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status);
+
+static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu,
+ u16 *max_mtu, u16 *oper_mtu, u8 port)
+{
+ u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
+
+ MLX5_SET(pmtu_reg, in, local_port, port);
+ mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PMTU, 0, 0);
+
+ if (max_mtu)
+ *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu);
+ if (oper_mtu)
+ *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu);
+ if (admin_mtu)
+ *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu);
+}
+
+int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port)
+{
+ u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
+
+ MLX5_SET(pmtu_reg, in, admin_mtu, mtu);
+ MLX5_SET(pmtu_reg, in, local_port, port);
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PMTU, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_mtu);
+
+void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu,
+ u8 port)
+{
+ mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu);
+
+void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu,
+ u8 port)
+{
+ mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu);
+
+static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num)
+{
+ u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pmlp_reg)];
+ int module_mapping;
+ int err;
+
+ MLX5_SET(pmlp_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_PMLP, 0, 0);
+ if (err)
+ return err;
+
+ module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping);
+ *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK;
+
+ return 0;
+}
+
+int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
+ u16 offset, u16 size, u8 *data)
+{
+ u32 out[MLX5_ST_SZ_DW(mcia_reg)];
+ u32 in[MLX5_ST_SZ_DW(mcia_reg)];
+ int module_num;
+ u16 i2c_addr;
+ int status;
+ int err;
+ void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
+
+ err = mlx5_query_module_num(dev, &module_num);
+ if (err)
+ return err;
+
+ memset(in, 0, sizeof(in));
+ size = min_t(int, size, MLX5_EEPROM_MAX_BYTES);
+
+ if (offset < MLX5_EEPROM_PAGE_LENGTH &&
+ offset + size > MLX5_EEPROM_PAGE_LENGTH)
+ /* Cross pages read, read until offset 256 in low page */
+ size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
+
+ i2c_addr = MLX5_I2C_ADDR_LOW;
+
+ MLX5_SET(mcia_reg, in, l, 0);
+ MLX5_SET(mcia_reg, in, module, module_num);
+ MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr);
+ MLX5_SET(mcia_reg, in, page_number, 0);
+ MLX5_SET(mcia_reg, in, device_address, offset);
+ MLX5_SET(mcia_reg, in, size, size);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCIA, 0, 0);
+ if (err)
+ return err;
+
+ status = MLX5_GET(mcia_reg, out, status);
+ if (status) {
+ mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n",
+ status);
+ return -EIO;
+ }
+
+ memcpy(data, ptr, size);
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom);
+
+static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc,
+ int pvlc_size, u8 local_port)
+{
+ u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0};
+
+ MLX5_SET(pvlc_reg, in, local_port, local_port);
+ return mlx5_core_access_reg(dev, in, sizeof(in), pvlc,
+ pvlc_size, MLX5_REG_PVLC, 0, 0);
+}
+
+int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
+ u8 *vl_hw_cap, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(pvlc_reg)];
+ int err;
+
+ err = mlx5_query_port_pvlc(dev, out, sizeof(out), local_port);
+ if (err)
+ return err;
+
+ *vl_hw_cap = MLX5_GET(pvlc_reg, out, vl_hw_cap);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap);
+
+int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
+ u8 port_num, void *out, size_t sz)
+{
+ u32 *in;
+ int err;
+
+ in = kvzalloc(sz, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ MLX5_SET(ppcnt_reg, in, local_port, port_num);
+
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
+ err = mlx5_core_access_reg(dev, in, sz, out,
+ sz, MLX5_REG_PPCNT, 0, 0);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
+
+static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out,
+ u32 out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ out_size, MLX5_REG_PFCC, 0, 0);
+}
+
+int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+ MLX5_SET(pfcc_reg, in, pptx, tx_pause);
+ MLX5_SET(pfcc_reg, in, pprx, rx_pause);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_pause);
+
+int mlx5_query_port_pause(struct mlx5_core_dev *dev,
+ u32 *rx_pause, u32 *tx_pause)
+{
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+ int err;
+
+ err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ if (rx_pause)
+ *rx_pause = MLX5_GET(pfcc_reg, out, pprx);
+
+ if (tx_pause)
+ *tx_pause = MLX5_GET(pfcc_reg, out, pptx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_pause);
+
+int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev,
+ u16 stall_critical_watermark,
+ u16 stall_minor_watermark)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+ MLX5_SET(pfcc_reg, in, pptx_mask_n, 1);
+ MLX5_SET(pfcc_reg, in, pprx_mask_n, 1);
+ MLX5_SET(pfcc_reg, in, ppan_mask_n, 1);
+ MLX5_SET(pfcc_reg, in, critical_stall_mask, 1);
+ MLX5_SET(pfcc_reg, in, minor_stall_mask, 1);
+ MLX5_SET(pfcc_reg, in, device_stall_critical_watermark,
+ stall_critical_watermark);
+ MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+
+int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev,
+ u16 *stall_critical_watermark,
+ u16 *stall_minor_watermark)
+{
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+ int err;
+
+ err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ if (stall_critical_watermark)
+ *stall_critical_watermark = MLX5_GET(pfcc_reg, out,
+ device_stall_critical_watermark);
+
+ if (stall_minor_watermark)
+ *stall_minor_watermark = MLX5_GET(pfcc_reg, out,
+ device_stall_minor_watermark);
+
+ return 0;
+}
+
+int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+ MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx);
+ MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx);
+ MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_tx);
+ MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_rx);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_pfc);
+
+int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
+{
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+ int err;
+
+ err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ if (pfc_en_tx)
+ *pfc_en_tx = MLX5_GET(pfcc_reg, out, pfctx);
+
+ if (pfc_en_rx)
+ *pfc_en_rx = MLX5_GET(pfcc_reg, out, pfcrx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
+
+void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
+ u8 *an_status,
+ u8 *an_disable_cap, u8 *an_disable_admin)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+
+ *an_status = 0;
+ *an_disable_cap = 0;
+ *an_disable_admin = 0;
+
+ if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1))
+ return;
+
+ *an_status = MLX5_GET(ptys_reg, out, an_status);
+ *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
+ *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg);
+
+int mlx5_max_tc(struct mlx5_core_dev *mdev)
+{
+ u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
+
+ return num_tc - 1;
+}
+
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0};
+
+ MLX5_SET(dcbx_param, in, port_number, 1);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0);
+}
+
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in)
+{
+ u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+ MLX5_SET(dcbx_param, in, port_number, 1);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(out), out,
+ sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1);
+}
+
+int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
+{
+ u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+ int err;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ if (prio_tc[i] > mlx5_max_tc(mdev))
+ return -EINVAL;
+
+ MLX5_SET(qtct_reg, in, prio, i);
+ MLX5_SET(qtct_reg, in, tclass, prio_tc[i]);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QTCT, 0, 1);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc);
+
+int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
+ u8 prio, u8 *tc)
+{
+ u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+ u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+ int err;
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+
+ MLX5_SET(qtct_reg, in, port_number, 1);
+ MLX5_SET(qtct_reg, in, prio, prio);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QTCT, 0, 0);
+ if (!err)
+ *tc = MLX5_GET(qtct_reg, out, tclass);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc);
+
+static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+
+ if (!MLX5_CAP_GEN(mdev, ets))
+ return -EOPNOTSUPP;
+
+ return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out),
+ MLX5_REG_QETCR, 0, 1);
+}
+
+static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
+ int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+
+ if (!MLX5_CAP_GEN(mdev, ets))
+ return -EOPNOTSUPP;
+
+ memset(in, 0, sizeof(in));
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen,
+ MLX5_REG_QETCR, 0, 0);
+}
+
+int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
+ int i;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1);
+ MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]);
+ }
+
+ return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group);
+
+int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev,
+ u8 tc, u8 *tc_group)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+ void *ets_tcn_conf;
+ int err;
+
+ err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+ if (err)
+ return err;
+
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+ tc_configuration[tc]);
+
+ *tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ group);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group);
+
+int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
+ int i;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1);
+ MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]);
+ }
+
+ return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc);
+
+int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
+ u8 tc, u8 *bw_pct)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+ void *ets_tcn_conf;
+ int err;
+
+ err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+ if (err)
+ return err;
+
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+ tc_configuration[tc]);
+
+ *bw_pct = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ bw_allocation);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc);
+
+int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+ u8 *max_bw_value,
+ u8 *max_bw_units)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
+ void *ets_tcn_conf;
+ int i;
+
+ MLX5_SET(qetc_reg, in, port_number, 1);
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]);
+
+ MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1);
+ MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units,
+ max_bw_units[i]);
+ MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value,
+ max_bw_value[i]);
+ }
+
+ return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit);
+
+int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+ u8 *max_bw_value,
+ u8 *max_bw_units)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+ void *ets_tcn_conf;
+ int err;
+ int i;
+
+ err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+ if (err)
+ return err;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]);
+
+ max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ max_bw_value);
+ max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ max_bw_units);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit);
+
+int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode)
+{
+ u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)] = {0};
+
+ MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL);
+ MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1);
+ MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode);
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_wol);
+
+int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
+{
+ u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {0};
+ int err;
+
+ MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL);
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
+
+static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
+ int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ outlen, MLX5_REG_PCMR, 0, 0);
+}
+
+static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+
+ return mlx5_core_access_reg(mdev, in, inlen, out,
+ sizeof(out), MLX5_REG_PCMR, 0, 1);
+}
+
+int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ MLX5_SET(pcmr_reg, in, fcs_chk, enable);
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
+ bool *enabled)
+{
+ u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+ /* Default values for FW which do not support MLX5_REG_PCMR */
+ *supported = false;
+ *enabled = true;
+
+ if (!MLX5_CAP_GEN(mdev, ports_check))
+ return;
+
+ if (mlx5_query_ports_check(mdev, out, sizeof(out)))
+ return;
+
+ *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap));
+ *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
+}
+
+static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
+ "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */
+ "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */
+ "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */
+};
+
+static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
+ "Power budget exceeded",
+ "Long Range for non MLNX cable",
+ "Bus stuck(I2C or data shorted)",
+ "No EEPROM/retry timeout",
+ "Enforce part number list",
+ "Unknown identifier",
+ "High Temperature",
+ "Bad or shorted cable/module",
+ "Unknown status",
+};
+
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+ enum port_module_event_status_type module_status;
+ enum port_module_event_error_type error_type;
+ struct mlx5_eqe_port_module *module_event_eqe;
+ struct mlx5_priv *priv = &dev->priv;
+ u8 module_num;
+
+ module_event_eqe = &eqe->data.port_module;
+ module_num = module_event_eqe->module;
+ module_status = module_event_eqe->module_status &
+ PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+ error_type = module_event_eqe->error_type &
+ PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+ if (module_status < MLX5_MODULE_STATUS_ERROR) {
+ priv->pme_stats.status_counters[module_status - 1]++;
+ } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
+ if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
+ /* Unknown error type */
+ error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
+ priv->pme_stats.error_counters[error_type]++;
+ }
+
+ if (!printk_ratelimit())
+ return;
+
+ if (module_status < MLX5_MODULE_STATUS_ERROR)
+ mlx5_core_info(dev,
+ "Port module event: module %u, %s\n",
+ module_num, mlx5_pme_status[module_status - 1]);
+
+ else if (module_status == MLX5_MODULE_STATUS_ERROR)
+ mlx5_core_info(dev,
+ "Port module event[error]: module %u, %s, %s\n",
+ module_num, mlx5_pme_status[module_status - 1],
+ mlx5_pme_error[error_type]);
+}
+
+int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
+{
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps,
+ mtpps_size, MLX5_REG_MTPPS, 0, 0);
+}
+
+int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
+{
+ u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ return mlx5_core_access_reg(mdev, mtpps, mtpps_size, out,
+ sizeof(out), MLX5_REG_MTPPS, 0, 1);
+}
+
+int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode)
+{
+ u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+ int err = 0;
+
+ MLX5_SET(mtppse_reg, in, pin, pin);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MTPPSE, 0, 0);
+ if (err)
+ return err;
+
+ *arm = MLX5_GET(mtppse_reg, in, event_arm);
+ *mode = MLX5_GET(mtppse_reg, in, event_generation_mode);
+
+ return err;
+}
+
+int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode)
+{
+ u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+
+ MLX5_SET(mtppse_reg, in, pin, pin);
+ MLX5_SET(mtppse_reg, in, event_arm, arm);
+ MLX5_SET(mtppse_reg, in, event_generation_mode, mode);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MTPPSE, 0, 1);
+}
+
+int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state)
+{
+ u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ int err;
+
+ MLX5_SET(qpts_reg, in, local_port, 1);
+ MLX5_SET(qpts_reg, in, trust_state, trust_state);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QPTS, 0, 1);
+ return err;
+}
+
+int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state)
+{
+ u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ int err;
+
+ MLX5_SET(qpts_reg, in, local_port, 1);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QPTS, 0, 0);
+ if (!err)
+ *trust_state = MLX5_GET(qpts_reg, out, trust_state);
+
+ return err;
+}
+
+int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio)
+{
+ int sz = MLX5_ST_SZ_BYTES(qpdpm_reg);
+ void *qpdpm_dscp;
+ void *out;
+ void *in;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpdpm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0);
+ if (err)
+ goto out;
+
+ memcpy(in, out, sz);
+ MLX5_SET(qpdpm_reg, in, local_port, 1);
+
+ /* Update the corresponding dscp entry */
+ qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, in, dscp[dscp]);
+ MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, prio, prio);
+ MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, e, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 1);
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+
+/* dscp2prio[i]: priority that dscp i mapped to */
+#define MLX5E_SUPPORTED_DSCP 64
+int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio)
+{
+ int sz = MLX5_ST_SZ_BYTES(qpdpm_reg);
+ void *qpdpm_dscp;
+ void *out;
+ void *in;
+ int err;
+ int i;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpdpm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0);
+ if (err)
+ goto out;
+
+ for (i = 0; i < (MLX5E_SUPPORTED_DSCP); i++) {
+ qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, out, dscp[i]);
+ dscp2prio[i] = MLX5_GET16(qpdpm_dscp_reg, qpdpm_dscp, prio);
+ }
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
new file mode 100644
index 000000000..479ac21cd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -0,0 +1,636 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/gfp.h>
+#include <linux/export.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/transobj.h>
+
+#include "mlx5_core.h"
+
+static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
+ u32 rsn)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ struct mlx5_core_rsc_common *common;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+
+ common = radix_tree_lookup(&table->tree, rsn);
+ if (common)
+ atomic_inc(&common->refcount);
+
+ spin_unlock_irqrestore(&table->lock, flags);
+
+ if (!common) {
+ mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
+ rsn);
+ return NULL;
+ }
+ return common;
+}
+
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
+{
+ if (atomic_dec_and_test(&common->refcount))
+ complete(&common->free);
+}
+
+static u64 qp_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+ BIT(MLX5_EVENT_TYPE_COMM_EST) |
+ BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+ BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+ BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+ BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+ BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+ return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+ return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static u64 dct_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_DCT_DRAINED);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+ switch (rsc_type) {
+ case MLX5_EVENT_QUEUE_TYPE_QP:
+ return BIT(event_type) & qp_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_RQ:
+ return BIT(event_type) & rq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_SQ:
+ return BIT(event_type) & sq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_DCT:
+ return BIT(event_type) & dct_allowed_event_types();
+ default:
+ WARN(1, "Event arrived for unknown resource type");
+ return false;
+ }
+}
+
+void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+{
+ struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+ struct mlx5_core_dct *dct;
+ struct mlx5_core_qp *qp;
+
+ if (!common)
+ return;
+
+ if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
+ mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
+ event_type, rsn);
+ goto out;
+ }
+
+ switch (common->res) {
+ case MLX5_RES_QP:
+ case MLX5_RES_RQ:
+ case MLX5_RES_SQ:
+ qp = (struct mlx5_core_qp *)common;
+ qp->event(qp, event_type);
+ break;
+ case MLX5_RES_DCT:
+ dct = (struct mlx5_core_dct *)common;
+ if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
+ complete(&dct->drained);
+ break;
+ default:
+ mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
+ }
+out:
+ mlx5_core_put_rsc(common);
+}
+
+static int create_resource_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp,
+ int rsc_type)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ int err;
+
+ qp->common.res = rsc_type;
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree,
+ qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+ qp);
+ spin_unlock_irq(&table->lock);
+ if (err)
+ return err;
+
+ atomic_set(&qp->common.refcount, 1);
+ init_completion(&qp->common.free);
+ qp->pid = current->pid;
+
+ return 0;
+}
+
+static void destroy_resource_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ radix_tree_delete(&table->tree,
+ qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+ spin_unlock_irqrestore(&table->lock, flags);
+ mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+ wait_for_completion(&qp->common.free);
+}
+
+int mlx5_core_create_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct,
+ u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0};
+ u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
+ u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+ int err;
+
+ init_completion(&dct->drained);
+ MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
+
+ err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+ if (err) {
+ mlx5_core_warn(dev, "create DCT failed, ret %d\n", err);
+ return err;
+ }
+
+ qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+ err = create_resource_common(dev, qp, MLX5_RES_DCT);
+ if (err)
+ goto err_cmd;
+
+ return 0;
+err_cmd:
+ MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
+ mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
+ (void *)&out, sizeof(dout));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_create_dct);
+
+int mlx5_core_create_qp(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp,
+ u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {0};
+ u32 dout[MLX5_ST_SZ_DW(destroy_qp_out)];
+ u32 din[MLX5_ST_SZ_DW(destroy_qp_in)];
+ int err;
+
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (err)
+ return err;
+
+ qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+ mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
+
+ err = create_resource_common(dev, qp, MLX5_RES_QP);
+ if (err)
+ goto err_cmd;
+
+ err = mlx5_debug_qp_add(dev, qp);
+ if (err)
+ mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
+ qp->qpn);
+
+ atomic_inc(&dev->num_qps);
+
+ return 0;
+
+err_cmd:
+ memset(din, 0, sizeof(din));
+ memset(dout, 0, sizeof(dout));
+ MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+ mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
+
+static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 out[MLX5_ST_SZ_DW(drain_dct_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
+ MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+ return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)&out, sizeof(out));
+}
+
+int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+ int err;
+
+ err = mlx5_core_drain_dct(dev, dct);
+ if (err) {
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ goto destroy;
+ } else {
+ mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err);
+ return err;
+ }
+ }
+ wait_for_completion(&dct->drained);
+destroy:
+ destroy_resource_common(dev, &dct->mqp);
+ MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+ err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)&out, sizeof(out));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct);
+
+int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_qp_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {0};
+ int err;
+
+ mlx5_debug_qp_remove(dev, qp);
+
+ destroy_resource_common(dev, qp);
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ atomic_dec(&dev->num_qps);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
+
+int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev,
+ u32 timeout_usec)
+{
+ u32 out[MLX5_ST_SZ_DW(set_delay_drop_params_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {0};
+
+ MLX5_SET(set_delay_drop_params_in, in, opcode,
+ MLX5_CMD_OP_SET_DELAY_DROP_PARAMS);
+ MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout,
+ timeout_usec / 100);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_set_delay_drop);
+
+struct mbox_info {
+ u32 *in;
+ u32 *out;
+ int inlen;
+ int outlen;
+};
+
+static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen)
+{
+ mbox->inlen = inlen;
+ mbox->outlen = outlen;
+ mbox->in = kzalloc(mbox->inlen, GFP_KERNEL);
+ mbox->out = kzalloc(mbox->outlen, GFP_KERNEL);
+ if (!mbox->in || !mbox->out) {
+ kfree(mbox->in);
+ kfree(mbox->out);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void mbox_free(struct mbox_info *mbox)
+{
+ kfree(mbox->in);
+ kfree(mbox->out);
+}
+
+static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
+ u32 opt_param_mask, void *qpc,
+ struct mbox_info *mbox)
+{
+ mbox->out = NULL;
+ mbox->in = NULL;
+
+#define MBOX_ALLOC(mbox, typ) \
+ mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
+
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \
+ MLX5_SET(typ##_in, in, opcode, _opcode); \
+ MLX5_SET(typ##_in, in, qpn, _qpn)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \
+ MOD_QP_IN_SET(typ, in, _opcode, _qpn); \
+ MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
+ memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc))
+
+ switch (opcode) {
+ /* 2RST & 2ERR */
+ case MLX5_CMD_OP_2RST_QP:
+ if (MBOX_ALLOC(mbox, qp_2rst))
+ return -ENOMEM;
+ MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn);
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ if (MBOX_ALLOC(mbox, qp_2err))
+ return -ENOMEM;
+ MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn);
+ break;
+
+ /* MODIFY with QPC */
+ case MLX5_CMD_OP_RST2INIT_QP:
+ if (MBOX_ALLOC(mbox, rst2init_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ if (MBOX_ALLOC(mbox, init2rtr_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ if (MBOX_ALLOC(mbox, rtr2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ if (MBOX_ALLOC(mbox, rts2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ if (MBOX_ALLOC(mbox, sqerr2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ if (MBOX_ALLOC(mbox, init2init_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ default:
+ mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
+ opcode, qpn);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
+ u32 opt_param_mask, void *qpc,
+ struct mlx5_core_qp *qp)
+{
+ struct mbox_info mbox;
+ int err;
+
+ err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
+ opt_param_mask, qpc, &mbox);
+ if (err)
+ return err;
+
+ err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen);
+ mbox_free(&mbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_qp_modify);
+
+void mlx5_init_qp_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ memset(table, 0, sizeof(*table));
+ spin_lock_init(&table->lock);
+ INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+ mlx5_qp_debugfs_init(dev);
+}
+
+void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
+{
+ mlx5_qp_debugfs_cleanup(dev);
+}
+
+int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0};
+
+ MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+ MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
+
+int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT);
+ MLX5_SET(query_dct_in, in, dctn, qp->qpn);
+
+ return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)out, outlen);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_dct_query);
+
+int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {0};
+ int err;
+
+ MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc);
+
+int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {0};
+
+ MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
+
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq)
+{
+ int err;
+ u32 rqn;
+
+ err = mlx5_core_create_rq(dev, in, inlen, &rqn);
+ if (err)
+ return err;
+
+ rq->qpn = rqn;
+ err = create_resource_common(dev, rq, MLX5_RES_RQ);
+ if (err)
+ goto err_destroy_rq;
+
+ return 0;
+
+err_destroy_rq:
+ mlx5_core_destroy_rq(dev, rq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *rq)
+{
+ destroy_resource_common(dev, rq);
+ mlx5_core_destroy_rq(dev, rq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq)
+{
+ int err;
+ u32 sqn;
+
+ err = mlx5_core_create_sq(dev, in, inlen, &sqn);
+ if (err)
+ return err;
+
+ sq->qpn = sqn;
+ err = create_resource_common(dev, sq, MLX5_RES_SQ);
+ if (err)
+ goto err_destroy_sq;
+
+ return 0;
+
+err_destroy_sq:
+ mlx5_core_destroy_sq(dev, sq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *sq)
+{
+ destroy_resource_common(dev, sq);
+ mlx5_core_destroy_sq(dev, sq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
+
+int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *counter_id = MLX5_GET(alloc_q_counter_out, out,
+ counter_set_id);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter);
+
+int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)] = {0};
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter);
+
+int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
+ int reset, void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0};
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+ MLX5_SET(query_q_counter_in, in, clear, reset);
+ MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
new file mode 100644
index 000000000..bc86dffdc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+/* Scheduling element fw management */
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *ctx, u32 *element_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
+ void *schedc;
+ int err;
+
+ schedc = MLX5_ADDR_OF(create_scheduling_element_in, in,
+ scheduling_context);
+ MLX5_SET(create_scheduling_element_in, in, opcode,
+ MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT);
+ MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy,
+ hierarchy);
+ memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *element_id = MLX5_GET(create_scheduling_element_out, out,
+ scheduling_element_id);
+ return 0;
+}
+
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *ctx, u32 element_id,
+ u32 modify_bitmask)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
+ void *schedc;
+
+ schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in,
+ scheduling_context);
+ MLX5_SET(modify_scheduling_element_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT);
+ MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id,
+ element_id);
+ MLX5_SET(modify_scheduling_element_in, in, modify_bitmask,
+ modify_bitmask);
+ MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy,
+ hierarchy);
+ memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ u32 element_id)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
+
+ MLX5_SET(destroy_scheduling_element_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+ MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id,
+ element_id);
+ MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy,
+ hierarchy);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+/* Finds an entry where we can register the given rate
+ * If the rate already exists, return the entry where it is registered,
+ * otherwise return the first available entry.
+ * If the table is full, return NULL
+ */
+static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
+ struct mlx5_rate_limit *rl)
+{
+ struct mlx5_rl_entry *ret_entry = NULL;
+ bool empty_found = false;
+ int i;
+
+ for (i = 0; i < table->max_size; i++) {
+ if (mlx5_rl_are_equal(&table->rl_entry[i].rl, rl))
+ return &table->rl_entry[i];
+ if (!empty_found && !table->rl_entry[i].rl.rate) {
+ empty_found = true;
+ ret_entry = &table->rl_entry[i];
+ }
+ }
+
+ return ret_entry;
+}
+
+static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
+ u16 index,
+ struct mlx5_rate_limit *rl)
+{
+ u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
+
+ MLX5_SET(set_pp_rate_limit_in, in, opcode,
+ MLX5_CMD_OP_SET_PP_RATE_LIMIT);
+ MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
+ MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rl->rate);
+ MLX5_SET(set_pp_rate_limit_in, in, burst_upper_bound, rl->max_burst_sz);
+ MLX5_SET(set_pp_rate_limit_in, in, typical_packet_size, rl->typical_pkt_sz);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+
+ return (rate <= table->max_rate && rate >= table->min_rate);
+}
+EXPORT_SYMBOL(mlx5_rl_is_in_range);
+
+bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
+ struct mlx5_rate_limit *rl_1)
+{
+ return ((rl_0->rate == rl_1->rate) &&
+ (rl_0->max_burst_sz == rl_1->max_burst_sz) &&
+ (rl_0->typical_pkt_sz == rl_1->typical_pkt_sz));
+}
+EXPORT_SYMBOL(mlx5_rl_are_equal);
+
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
+ struct mlx5_rate_limit *rl)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rl_entry *entry;
+ int err = 0;
+
+ mutex_lock(&table->rl_lock);
+
+ if (!rl->rate || !mlx5_rl_is_in_range(dev, rl->rate)) {
+ mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",
+ rl->rate, table->min_rate, table->max_rate);
+ err = -EINVAL;
+ goto out;
+ }
+
+ entry = find_rl_entry(table, rl);
+ if (!entry) {
+ mlx5_core_err(dev, "Max number of %u rates reached\n",
+ table->max_size);
+ err = -ENOSPC;
+ goto out;
+ }
+ if (entry->refcount) {
+ /* rate already configured */
+ entry->refcount++;
+ } else {
+ /* new rate limit */
+ err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl);
+ if (err) {
+ mlx5_core_err(dev, "Failed configuring rate limit(err %d): \
+ rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+ err, rl->rate, rl->max_burst_sz,
+ rl->typical_pkt_sz);
+ goto out;
+ }
+ entry->rl = *rl;
+ entry->refcount = 1;
+ }
+ *index = entry->index;
+
+out:
+ mutex_unlock(&table->rl_lock);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_rl_add_rate);
+
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rl_entry *entry = NULL;
+ struct mlx5_rate_limit reset_rl = {0};
+
+ /* 0 is a reserved value for unlimited rate */
+ if (rl->rate == 0)
+ return;
+
+ mutex_lock(&table->rl_lock);
+ entry = find_rl_entry(table, rl);
+ if (!entry || !entry->refcount) {
+ mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u \
+ are not configured\n",
+ rl->rate, rl->max_burst_sz, rl->typical_pkt_sz);
+ goto out;
+ }
+
+ entry->refcount--;
+ if (!entry->refcount) {
+ /* need to remove rate */
+ mlx5_set_pp_rate_limit_cmd(dev, entry->index, &reset_rl);
+ entry->rl = reset_rl;
+ }
+
+out:
+ mutex_unlock(&table->rl_lock);
+}
+EXPORT_SYMBOL(mlx5_rl_remove_rate);
+
+int mlx5_init_rl_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ int i;
+
+ mutex_init(&table->rl_lock);
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {
+ table->max_size = 0;
+ return 0;
+ }
+
+ /* First entry is reserved for unlimited rate */
+ table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1;
+ table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);
+ table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate);
+
+ table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
+ GFP_KERNEL);
+ if (!table->rl_entry)
+ return -ENOMEM;
+
+ /* The index represents the index in HW rate limit table
+ * Index 0 is reserved for unlimited rate
+ */
+ for (i = 0; i < table->max_size; i++)
+ table->rl_entry[i].index = i + 1;
+
+ /* Index 0 is reserved */
+ mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n",
+ table->max_size,
+ table->min_rate >> 10,
+ table->max_rate >> 10);
+
+ return 0;
+}
+
+void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rate_limit rl = {0};
+ int i;
+
+ /* Clear all configured rates */
+ for (i = 0; i < table->max_size; i++)
+ if (table->rl_entry[i].rl.rate)
+ mlx5_set_pp_rate_limit_cmd(dev, table->rl_entry[i].index,
+ &rl);
+
+ kfree(dev->priv.rl_table.rl_entry);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
new file mode 100644
index 000000000..a0674962f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ return !!sriov->num_vfs;
+}
+
+static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ struct mlx5_hca_vport_context *in;
+ int err = 0;
+
+ /* Restore sriov guid and policy settings */
+ if (sriov->vfs_ctx[vf].node_guid ||
+ sriov->vfs_ctx[vf].port_guid ||
+ sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) {
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->node_guid = sriov->vfs_ctx[vf].node_guid;
+ in->port_guid = sriov->vfs_ctx[vf].port_guid;
+ in->policy = sriov->vfs_ctx[vf].policy;
+ in->field_select =
+ !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID |
+ !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID |
+ !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY;
+
+ err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in);
+ if (err)
+ mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf);
+
+ kfree(in);
+ }
+
+ return err;
+}
+
+static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err;
+ int vf;
+
+ if (sriov->enabled_vfs) {
+ mlx5_core_warn(dev,
+ "failed to enable SRIOV on device, already enabled with %d vfs\n",
+ sriov->enabled_vfs);
+ return -EBUSY;
+ }
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ goto enable_vfs_hca;
+
+ err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
+ if (err) {
+ mlx5_core_warn(dev,
+ "failed to enable eswitch SRIOV (%d)\n", err);
+ return err;
+ }
+
+enable_vfs_hca:
+ for (vf = 0; vf < num_vfs; vf++) {
+ err = mlx5_core_enable_hca(dev, vf + 1);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
+ continue;
+ }
+ sriov->vfs_ctx[vf].enabled = 1;
+ sriov->enabled_vfs++;
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
+ err = sriov_restore_guids(dev, vf);
+ if (err) {
+ mlx5_core_warn(dev,
+ "failed to restore VF %d settings, err %d\n",
+ vf, err);
+ continue;
+ }
+ }
+ mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf);
+ }
+
+ return 0;
+}
+
+static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err;
+ int vf;
+
+ if (!sriov->enabled_vfs)
+ goto out;
+
+ for (vf = 0; vf < sriov->num_vfs; vf++) {
+ if (!sriov->vfs_ctx[vf].enabled)
+ continue;
+ err = mlx5_core_disable_hca(dev, vf + 1);
+ if (err) {
+ mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
+ continue;
+ }
+ sriov->vfs_ctx[vf].enabled = 0;
+ sriov->enabled_vfs--;
+ }
+
+out:
+ if (MLX5_ESWITCH_MANAGER(dev))
+ mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+
+ if (mlx5_wait_for_vf_pages(dev))
+ mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
+}
+
+static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err = 0;
+
+ if (pci_num_vf(pdev)) {
+ mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n");
+ return -EBUSY;
+ }
+
+ err = pci_enable_sriov(pdev, num_vfs);
+ if (err)
+ mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
+
+ return err;
+}
+
+static void mlx5_pci_disable_sriov(struct pci_dev *pdev)
+{
+ pci_disable_sriov(pdev);
+}
+
+static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err = 0;
+
+ err = mlx5_device_enable_sriov(dev, num_vfs);
+ if (err) {
+ mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
+ return err;
+ }
+
+ err = mlx5_pci_enable_sriov(pdev, num_vfs);
+ if (err) {
+ mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err);
+ mlx5_device_disable_sriov(dev);
+ return err;
+ }
+
+ sriov->num_vfs = num_vfs;
+
+ return 0;
+}
+
+static void mlx5_sriov_disable(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ mlx5_pci_disable_sriov(pdev);
+ mlx5_device_disable_sriov(dev);
+ sriov->num_vfs = 0;
+}
+
+int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err = 0;
+
+ mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs);
+ if (!mlx5_core_is_pf(dev))
+ return -EPERM;
+
+ if (num_vfs) {
+ int ret;
+
+ ret = mlx5_lag_forbid(dev);
+ if (ret && (ret != -ENODEV))
+ return ret;
+ }
+
+ if (num_vfs) {
+ err = mlx5_sriov_enable(pdev, num_vfs);
+ } else {
+ mlx5_sriov_disable(pdev);
+ mlx5_lag_allow(dev);
+ }
+
+ return err ? err : num_vfs;
+}
+
+int mlx5_sriov_attach(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
+ return 0;
+
+ /* If sriov VFs exist in PCI level, enable them in device level */
+ return mlx5_device_enable_sriov(dev, sriov->num_vfs);
+}
+
+void mlx5_sriov_detach(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_pf(dev))
+ return;
+
+ mlx5_device_disable_sriov(dev);
+}
+
+int mlx5_sriov_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ struct pci_dev *pdev = dev->pdev;
+ int total_vfs;
+
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ total_vfs = pci_sriov_get_totalvfs(pdev);
+ sriov->num_vfs = pci_num_vf(pdev);
+ sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
+ if (!sriov->vfs_ctx)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ if (!mlx5_core_is_pf(dev))
+ return;
+
+ kfree(sriov->vfs_ctx);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
new file mode 100644
index 000000000..23cc337a9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -0,0 +1,692 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/srq.h>
+#include <rdma/ib_verbs.h>
+#include "mlx5_core.h"
+#include <linux/mlx5/transobj.h>
+
+void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
+{
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+ struct mlx5_core_srq *srq;
+
+ spin_lock(&table->lock);
+
+ srq = radix_tree_lookup(&table->tree, srqn);
+ if (srq)
+ atomic_inc(&srq->refcount);
+
+ spin_unlock(&table->lock);
+
+ if (!srq) {
+ mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
+ return;
+ }
+
+ srq->event(srq, event_type);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+}
+
+static int get_pas_size(struct mlx5_srq_attr *in)
+{
+ u32 log_page_size = in->log_page_size + 12;
+ u32 log_srq_size = in->log_size;
+ u32 log_rq_stride = in->wqe_shift;
+ u32 page_offset = in->page_offset;
+ u32 po_quanta = 1 << (log_page_size - 6);
+ u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
+ u32 page_size = 1 << log_page_size;
+ u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
+ u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
+
+ return rq_num_pas * sizeof(u64);
+}
+
+static void set_wq(void *wq, struct mlx5_srq_attr *in)
+{
+ MLX5_SET(wq, wq, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
+ MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
+ MLX5_SET(wq, wq, log_wq_sz, in->log_size);
+ MLX5_SET(wq, wq, page_offset, in->page_offset);
+ MLX5_SET(wq, wq, lwm, in->lwm);
+ MLX5_SET(wq, wq, pd, in->pd);
+ MLX5_SET64(wq, wq, dbr_addr, in->db_record);
+}
+
+static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
+ MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
+ MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
+ MLX5_SET(srqc, srqc, page_offset, in->page_offset);
+ MLX5_SET(srqc, srqc, lwm, in->lwm);
+ MLX5_SET(srqc, srqc, pd, in->pd);
+ MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
+ MLX5_SET(srqc, srqc, xrcd, in->xrcd);
+ MLX5_SET(srqc, srqc, cqn, in->cqn);
+}
+
+static void get_wq(void *wq, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(wq, wq, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
+ in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
+ in->log_size = MLX5_GET(wq, wq, log_wq_sz);
+ in->page_offset = MLX5_GET(wq, wq, page_offset);
+ in->lwm = MLX5_GET(wq, wq, lwm);
+ in->pd = MLX5_GET(wq, wq, pd);
+ in->db_record = MLX5_GET64(wq, wq, dbr_addr);
+}
+
+static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(srqc, srqc, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
+ in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
+ in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
+ in->page_offset = MLX5_GET(srqc, srqc, page_offset);
+ in->lwm = MLX5_GET(srqc, srqc, lwm);
+ in->pd = MLX5_GET(srqc, srqc, pd);
+ in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
+}
+
+struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
+{
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+ struct mlx5_core_srq *srq;
+
+ spin_lock(&table->lock);
+
+ srq = radix_tree_lookup(&table->tree, srqn);
+ if (srq)
+ atomic_inc(&srq->refcount);
+
+ spin_unlock(&table->lock);
+
+ return srq;
+}
+EXPORT_SYMBOL(mlx5_core_get_srq);
+
+static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
+ void *create_in;
+ void *srqc;
+ void *pas;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
+ pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
+
+ set_srqc(srqc, in);
+ memcpy(pas, in->pas, pas_size);
+
+ MLX5_SET(create_srq_in, create_in, opcode,
+ MLX5_CMD_OP_CREATE_SRQ);
+
+ err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+ sizeof(create_out));
+ kvfree(create_in);
+ if (!err)
+ srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+
+ return err;
+}
+
+static int destroy_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
+ u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
+
+ MLX5_SET(destroy_srq_in, srq_in, opcode,
+ MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
+
+ return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+ srq_out, sizeof(srq_out));
+}
+
+static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq)
+{
+ u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
+ u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+
+ MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
+ MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
+ MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
+
+ return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+ srq_out, sizeof(srq_out));
+}
+
+static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
+ u32 *srq_out;
+ void *srqc;
+ int err;
+
+ srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
+ if (!srq_out)
+ return -ENOMEM;
+
+ MLX5_SET(query_srq_in, srq_in, opcode,
+ MLX5_CMD_OP_QUERY_SRQ);
+ MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
+ err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+ srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
+ if (err)
+ goto out;
+
+ srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
+ get_srqc(srqc, out);
+ if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+out:
+ kvfree(srq_out);
+ return err;
+}
+
+static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
+ void *create_in;
+ void *xrc_srqc;
+ void *pas;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
+ xrc_srq_context_entry);
+ pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
+
+ set_srqc(xrc_srqc, in);
+ MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
+ memcpy(pas, in->pas, pas_size);
+ MLX5_SET(create_xrc_srq_in, create_in, opcode,
+ MLX5_CMD_OP_CREATE_XRC_SRQ);
+
+ memset(create_out, 0, sizeof(create_out));
+ err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+ sizeof(create_out));
+ if (err)
+ goto out;
+
+ srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+out:
+ kvfree(create_in);
+ return err;
+}
+
+static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
+ u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
+
+ MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
+ MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+
+ return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+ xrcsrq_out, sizeof(xrcsrq_out));
+}
+
+static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq, u16 lwm)
+{
+ u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
+ u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
+
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
+
+ return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+ xrcsrq_out, sizeof(xrcsrq_out));
+}
+
+static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
+ u32 *xrcsrq_out;
+ void *xrc_srqc;
+ int err;
+
+ xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
+ if (!xrcsrq_out)
+ return -ENOMEM;
+ memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
+
+ MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
+ MLX5_CMD_OP_QUERY_XRC_SRQ);
+ MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+
+ err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
+ MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+ if (err)
+ goto out;
+
+ xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
+ xrc_srq_context_entry);
+ get_srqc(xrc_srqc, out);
+ if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+ kvfree(xrcsrq_out);
+ return err;
+}
+
+static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ void *create_in;
+ void *rmpc;
+ void *wq;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ set_wq(wq, in);
+ memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
+
+ err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
+
+ kvfree(create_in);
+ return err;
+}
+
+static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ return mlx5_core_destroy_rmp(dev, srq->srqn);
+}
+
+static int arm_rmp_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ void *in;
+ void *rmpc;
+ void *wq;
+ void *bitmask;
+ int err;
+
+ in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
+ bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(wq, wq, lwm, lwm);
+ MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+
+ err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
+
+ kvfree(in);
+ return err;
+}
+
+static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 *rmp_out;
+ void *rmpc;
+ int err;
+
+ rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
+ if (!rmp_out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
+ if (err)
+ goto out;
+
+ rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
+ get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
+ if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+ kvfree(rmp_out);
+ return err;
+}
+
+static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
+ void *create_in;
+ void *xrqc;
+ void *wq;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
+ wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+
+ set_wq(wq, in);
+ memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
+
+ if (in->type == IB_SRQT_TM) {
+ MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
+ if (in->flags & MLX5_SRQ_FLAG_RNDV)
+ MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
+ MLX5_SET(xrqc, xrqc,
+ tag_matching_topology_context.log_matching_list_sz,
+ in->tm_log_list_size);
+ }
+ MLX5_SET(xrqc, xrqc, user_index, in->user_index);
+ MLX5_SET(xrqc, xrqc, cqn, in->cqn);
+ MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+ err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+ sizeof(create_out));
+ kvfree(create_in);
+ if (!err)
+ srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+
+ return err;
+}
+
+static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
+
+ MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int arm_xrq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
+
+ MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
+ MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
+ MLX5_SET(arm_rq_in, in, lwm, lwm);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
+ u32 *xrq_out;
+ int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
+ void *xrqc;
+ int err;
+
+ xrq_out = kvzalloc(outlen, GFP_KERNEL);
+ if (!xrq_out)
+ return -ENOMEM;
+
+ MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
+ MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
+ if (err)
+ goto out;
+
+ xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
+ get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
+ if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+ out->tm_next_tag =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.append_next_index);
+ out->tm_hw_phase_cnt =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.hw_phase_cnt);
+ out->tm_sw_phase_cnt =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.sw_phase_cnt);
+
+out:
+ kvfree(xrq_out);
+ return err;
+}
+
+static int create_srq_split(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ if (!dev->issi)
+ return create_srq_cmd(dev, srq, in);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return create_xrc_srq_cmd(dev, srq, in);
+ case MLX5_RES_XRQ:
+ return create_xrq_cmd(dev, srq, in);
+ default:
+ return create_rmp_cmd(dev, srq, in);
+ }
+}
+
+static int destroy_srq_split(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ if (!dev->issi)
+ return destroy_srq_cmd(dev, srq);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return destroy_xrc_srq_cmd(dev, srq);
+ case MLX5_RES_XRQ:
+ return destroy_xrq_cmd(dev, srq);
+ default:
+ return destroy_rmp_cmd(dev, srq);
+ }
+}
+
+int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ int err;
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+
+ switch (in->type) {
+ case IB_SRQT_XRC:
+ srq->common.res = MLX5_RES_XSRQ;
+ break;
+ case IB_SRQT_TM:
+ srq->common.res = MLX5_RES_XRQ;
+ break;
+ default:
+ srq->common.res = MLX5_RES_SRQ;
+ }
+
+ err = create_srq_split(dev, srq, in);
+ if (err)
+ return err;
+
+ atomic_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree, srq->srqn, srq);
+ spin_unlock_irq(&table->lock);
+ if (err) {
+ mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
+ goto err_destroy_srq_split;
+ }
+
+ return 0;
+
+err_destroy_srq_split:
+ destroy_srq_split(dev, srq);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_srq);
+
+int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+{
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+ struct mlx5_core_srq *tmp;
+ int err;
+
+ spin_lock_irq(&table->lock);
+ tmp = radix_tree_delete(&table->tree, srq->srqn);
+ spin_unlock_irq(&table->lock);
+ if (!tmp) {
+ mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
+ return -EINVAL;
+ }
+ if (tmp != srq) {
+ mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
+ return -EINVAL;
+ }
+
+ err = destroy_srq_split(dev, srq);
+ if (err)
+ return err;
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_srq);
+
+int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ if (!dev->issi)
+ return query_srq_cmd(dev, srq, out);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return query_xrc_srq_cmd(dev, srq, out);
+ case MLX5_RES_XRQ:
+ return query_xrq_cmd(dev, srq, out);
+ default:
+ return query_rmp_cmd(dev, srq, out);
+ }
+}
+EXPORT_SYMBOL(mlx5_core_query_srq);
+
+int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq)
+{
+ if (!dev->issi)
+ return arm_srq_cmd(dev, srq, lwm, is_srq);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return arm_xrc_srq_cmd(dev, srq, lwm);
+ case MLX5_RES_XRQ:
+ return arm_xrq_cmd(dev, srq, lwm);
+ default:
+ return arm_rmp_cmd(dev, srq, lwm);
+ }
+}
+EXPORT_SYMBOL(mlx5_core_arm_srq);
+
+void mlx5_init_srq_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+
+ memset(table, 0, sizeof(*table));
+ spin_lock_init(&table->lock);
+ INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+}
+
+void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
+{
+ /* nothing */
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
new file mode 100644
index 000000000..a1ee9a8a7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include <linux/mlx5/transobj.h>
+
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *tdn = MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
+
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
+
+ MLX5_SET(dealloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
+
+int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {0};
+ int err;
+
+ MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *rqn = MLX5_GET(create_rq_out, out, rqn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq);
+
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_rq_out)];
+
+ MLX5_SET(modify_rq_in, in, rqn, rqn);
+ MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ);
+
+ memset(out, 0, sizeof(out));
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_rq);
+
+void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {0};
+
+ MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, in, rqn, rqn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq);
+
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+
+ MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
+ MLX5_SET(query_rq_in, in, rqn, rqn);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_rq);
+
+int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {0};
+ int err;
+
+ MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *sqn = MLX5_GET(create_sq_out, out, sqn);
+
+ return err;
+}
+
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0};
+
+ MLX5_SET(modify_sq_in, in, sqn, sqn);
+ MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_sq);
+
+void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {0};
+
+ MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, in, sqn, sqn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+
+ MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
+ MLX5_SET(query_sq_in, in, sqn, sqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_sq);
+
+int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state)
+{
+ void *out;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+ out = kvzalloc(inlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_sq(dev, sqn, out);
+ if (err)
+ goto out;
+
+ sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+ *state = MLX5_GET(sqc, sqc, state);
+
+out:
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state);
+
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *tirn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {0};
+ int err;
+
+ MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+
+ memset(out, 0, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *tirn = MLX5_GET(create_tir_out, out, tirn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_tir);
+
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_tir_out)] = {0};
+
+ MLX5_SET(modify_tir_in, in, tirn, tirn);
+ MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0};
+
+ MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, in, tirn, tirn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_tir);
+
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *tisn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {0};
+ int err;
+
+ MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *tisn = MLX5_GET(create_tis_out, out, tisn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_tis);
+
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
+
+ MLX5_SET(modify_tis_in, in, tisn, tisn);
+ MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
+
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_tis);
+
+void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
+
+ MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, tisn, tisn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_tis);
+
+int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *rmpn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
+ int err;
+
+ MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *rmpn = MLX5_GET(create_rmp_out, out, rmpn);
+
+ return err;
+}
+
+int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
+
+ MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
+
+ MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out,
+ sizeof(out));
+}
+
+int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
+
+ MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
+ MLX5_SET(query_rmp_in, in, rmpn, rmpn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
+{
+ void *in;
+ void *rmpc;
+ void *wq;
+ void *bitmask;
+ int err;
+
+ in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
+ bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(modify_rmp_in, in, rmpn, rmpn);
+ MLX5_SET(wq, wq, lwm, lwm);
+ MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+
+ err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *xsrqn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
+ int err;
+
+ MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
+
+ return err;
+}
+
+int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
+
+ MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
+
+ MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
+ MLX5_SET(arm_xrc_srq_in, in, lwm, lwm);
+ MLX5_SET(arm_xrc_srq_in, in, op_mod,
+ MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *rqtn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
+ int err;
+
+ MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *rqtn = MLX5_GET(create_rqt_out, out, rqtn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rqt);
+
+int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {0};
+
+ MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
+ MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
+
+ MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rqt);
+
+static int mlx5_hairpin_create_rq(struct mlx5_core_dev *mdev,
+ struct mlx5_hairpin_params *params, u32 *rqn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_rq_in)] = {0};
+ void *rqc, *wq;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ MLX5_SET(rqc, rqc, hairpin, 1);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, counter_set_id, params->q_counter);
+
+ MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size);
+ MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets);
+
+ return mlx5_core_create_rq(mdev, in, MLX5_ST_SZ_BYTES(create_rq_in), rqn);
+}
+
+static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev,
+ struct mlx5_hairpin_params *params, u32 *sqn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_sq_in)] = {0};
+ void *sqc, *wq;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(sqc, sqc, hairpin, 1);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+
+ MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size);
+ MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets);
+
+ return mlx5_core_create_sq(mdev, in, MLX5_ST_SZ_BYTES(create_sq_in), sqn);
+}
+
+static int mlx5_hairpin_create_queues(struct mlx5_hairpin *hp,
+ struct mlx5_hairpin_params *params)
+{
+ int i, j, err;
+
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn[i]);
+ if (err)
+ goto out_err_rq;
+ }
+
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn[i]);
+ if (err)
+ goto out_err_sq;
+ }
+
+ return 0;
+
+out_err_sq:
+ for (j = 0; j < i; j++)
+ mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[j]);
+ i = hp->num_channels;
+out_err_rq:
+ for (j = 0; j < i; j++)
+ mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[j]);
+ return err;
+}
+
+static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ for (i = 0; i < hp->num_channels; i++) {
+ mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]);
+ if (!hp->peer_gone)
+ mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+ }
+}
+
+static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn,
+ int curr_state, int next_state,
+ u16 peer_vhca, u32 peer_sq)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {0};
+ void *rqc;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ if (next_state == MLX5_RQC_STATE_RDY) {
+ MLX5_SET(rqc, rqc, hairpin_peer_sq, peer_sq);
+ MLX5_SET(rqc, rqc, hairpin_peer_vhca, peer_vhca);
+ }
+
+ MLX5_SET(modify_rq_in, in, rq_state, curr_state);
+ MLX5_SET(rqc, rqc, state, next_state);
+
+ return mlx5_core_modify_rq(func_mdev, rqn,
+ in, MLX5_ST_SZ_BYTES(modify_rq_in));
+}
+
+static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn,
+ int curr_state, int next_state,
+ u16 peer_vhca, u32 peer_rq)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_sq_in)] = {0};
+ void *sqc;
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+ if (next_state == MLX5_SQC_STATE_RDY) {
+ MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq);
+ MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca);
+ }
+
+ MLX5_SET(modify_sq_in, in, sq_state, curr_state);
+ MLX5_SET(sqc, sqc, state, next_state);
+
+ return mlx5_core_modify_sq(peer_mdev, sqn,
+ in, MLX5_ST_SZ_BYTES(modify_sq_in));
+}
+
+static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp)
+{
+ int i, j, err;
+
+ /* set peer SQs */
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i],
+ MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY,
+ MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn[i]);
+ if (err)
+ goto err_modify_sq;
+ }
+
+ /* set func RQs */
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i],
+ MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY,
+ MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn[i]);
+ if (err)
+ goto err_modify_rq;
+ }
+
+ return 0;
+
+err_modify_rq:
+ for (j = 0; j < i; j++)
+ mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[j], MLX5_RQC_STATE_RDY,
+ MLX5_RQC_STATE_RST, 0, 0);
+ i = hp->num_channels;
+err_modify_sq:
+ for (j = 0; j < i; j++)
+ mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[j], MLX5_SQC_STATE_RDY,
+ MLX5_SQC_STATE_RST, 0, 0);
+ return err;
+}
+
+static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ /* unset func RQs */
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY,
+ MLX5_RQC_STATE_RST, 0, 0);
+
+ /* unset peer SQs */
+ if (hp->peer_gone)
+ return;
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
+ MLX5_SQC_STATE_RST, 0, 0);
+}
+
+struct mlx5_hairpin *
+mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev,
+ struct mlx5_core_dev *peer_mdev,
+ struct mlx5_hairpin_params *params)
+{
+ struct mlx5_hairpin *hp;
+ int size, err;
+
+ size = sizeof(*hp) + params->num_channels * 2 * sizeof(u32);
+ hp = kzalloc(size, GFP_KERNEL);
+ if (!hp)
+ return ERR_PTR(-ENOMEM);
+
+ hp->func_mdev = func_mdev;
+ hp->peer_mdev = peer_mdev;
+ hp->num_channels = params->num_channels;
+
+ hp->rqn = (void *)hp + sizeof(*hp);
+ hp->sqn = hp->rqn + params->num_channels;
+
+ /* alloc and pair func --> peer hairpin */
+ err = mlx5_hairpin_create_queues(hp, params);
+ if (err)
+ goto err_create_queues;
+
+ err = mlx5_hairpin_pair_queues(hp);
+ if (err)
+ goto err_pair_queues;
+
+ return hp;
+
+err_pair_queues:
+ mlx5_hairpin_destroy_queues(hp);
+err_create_queues:
+ kfree(hp);
+ return ERR_PTR(err);
+}
+
+void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp)
+{
+ mlx5_hairpin_unpair_queues(hp);
+ mlx5_hairpin_destroy_queues(hp);
+ kfree(hp);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
new file mode 100644
index 000000000..8b97066dd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0};
+ int err;
+
+ MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *uarn = MLX5_GET(alloc_uar_out, out, uar);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
+
+int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0};
+
+ MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+ MLX5_SET(dealloc_uar_in, in, uar, uarn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_free_uar);
+
+static int uars_per_sys_page(struct mlx5_core_dev *mdev)
+{
+ if (MLX5_CAP_GEN(mdev, uar_4k))
+ return MLX5_CAP_GEN(mdev, num_of_uars_per_page);
+
+ return 1;
+}
+
+static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index)
+{
+ u32 system_page_index;
+
+ if (MLX5_CAP_GEN(mdev, uar_4k))
+ system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT);
+ else
+ system_page_index = index;
+
+ return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index;
+}
+
+static void up_rel_func(struct kref *kref)
+{
+ struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count);
+
+ list_del(&up->list);
+ iounmap(up->map);
+ if (mlx5_cmd_free_uar(up->mdev, up->index))
+ mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
+ kfree(up->reg_bitmap);
+ kfree(up->fp_bitmap);
+ kfree(up);
+}
+
+static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev,
+ bool map_wc)
+{
+ struct mlx5_uars_page *up;
+ int err = -ENOMEM;
+ phys_addr_t pfn;
+ int bfregs;
+ int i;
+
+ bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR;
+ up = kzalloc(sizeof(*up), GFP_KERNEL);
+ if (!up)
+ return ERR_PTR(err);
+
+ up->mdev = mdev;
+ up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+ if (!up->reg_bitmap)
+ goto error1;
+
+ up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+ if (!up->fp_bitmap)
+ goto error1;
+
+ for (i = 0; i < bfregs; i++)
+ if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR)
+ set_bit(i, up->reg_bitmap);
+ else
+ set_bit(i, up->fp_bitmap);
+
+ up->bfregs = bfregs;
+ up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
+ up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
+
+ err = mlx5_cmd_alloc_uar(mdev, &up->index);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err);
+ goto error1;
+ }
+
+ pfn = uar2pfn(mdev, up->index);
+ if (map_wc) {
+ up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!up->map) {
+ err = -EAGAIN;
+ goto error2;
+ }
+ } else {
+ up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!up->map) {
+ err = -ENOMEM;
+ goto error2;
+ }
+ }
+ kref_init(&up->ref_count);
+ mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n",
+ up->index, up->bfregs);
+ return up;
+
+error2:
+ if (mlx5_cmd_free_uar(mdev, up->index))
+ mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index);
+error1:
+ kfree(up->fp_bitmap);
+ kfree(up->reg_bitmap);
+ kfree(up);
+ return ERR_PTR(err);
+}
+
+struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_uars_page *ret;
+
+ mutex_lock(&mdev->priv.bfregs.reg_head.lock);
+ if (!list_empty(&mdev->priv.bfregs.reg_head.list)) {
+ ret = list_first_entry(&mdev->priv.bfregs.reg_head.list,
+ struct mlx5_uars_page, list);
+ kref_get(&ret->ref_count);
+ goto out;
+ }
+ ret = alloc_uars_page(mdev, false);
+ if (IS_ERR(ret))
+ goto out;
+ list_add(&ret->list, &mdev->priv.bfregs.reg_head.list);
+out:
+ mutex_unlock(&mdev->priv.bfregs.reg_head.lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(mlx5_get_uars_page);
+
+void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up)
+{
+ mutex_lock(&mdev->priv.bfregs.reg_head.lock);
+ kref_put(&up->ref_count, up_rel_func);
+ mutex_unlock(&mdev->priv.bfregs.reg_head.lock);
+}
+EXPORT_SYMBOL(mlx5_put_uars_page);
+
+static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi)
+{
+ /* return the offset in bytes from the start of the page to the
+ * blue flame area of the UAR
+ */
+ return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE +
+ (dbi % MLX5_BFREGS_PER_UAR) *
+ (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET;
+}
+
+static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+ bool map_wc, bool fast_path)
+{
+ struct mlx5_bfreg_data *bfregs;
+ struct mlx5_uars_page *up;
+ struct list_head *head;
+ unsigned long *bitmap;
+ unsigned int *avail;
+ struct mutex *lock; /* pointer to right mutex */
+ int dbi;
+
+ bfregs = &mdev->priv.bfregs;
+ if (map_wc) {
+ head = &bfregs->wc_head.list;
+ lock = &bfregs->wc_head.lock;
+ } else {
+ head = &bfregs->reg_head.list;
+ lock = &bfregs->reg_head.lock;
+ }
+ mutex_lock(lock);
+ if (list_empty(head)) {
+ up = alloc_uars_page(mdev, map_wc);
+ if (IS_ERR(up)) {
+ mutex_unlock(lock);
+ return PTR_ERR(up);
+ }
+ list_add(&up->list, head);
+ } else {
+ up = list_entry(head->next, struct mlx5_uars_page, list);
+ kref_get(&up->ref_count);
+ }
+ if (fast_path) {
+ bitmap = up->fp_bitmap;
+ avail = &up->fp_avail;
+ } else {
+ bitmap = up->reg_bitmap;
+ avail = &up->reg_avail;
+ }
+ dbi = find_first_bit(bitmap, up->bfregs);
+ clear_bit(dbi, bitmap);
+ (*avail)--;
+ if (!(*avail))
+ list_del(&up->list);
+
+ bfreg->map = up->map + map_offset(mdev, dbi);
+ bfreg->up = up;
+ bfreg->wc = map_wc;
+ bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR;
+ mutex_unlock(lock);
+
+ return 0;
+}
+
+int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+ bool map_wc, bool fast_path)
+{
+ int err;
+
+ err = alloc_bfreg(mdev, bfreg, map_wc, fast_path);
+ if (!err)
+ return 0;
+
+ if (err == -EAGAIN && map_wc)
+ return alloc_bfreg(mdev, bfreg, false, fast_path);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_alloc_bfreg);
+
+static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev,
+ struct mlx5_uars_page *up,
+ struct mlx5_sq_bfreg *bfreg)
+{
+ unsigned int uar_idx;
+ unsigned int bfreg_idx;
+ unsigned int bf_reg_size;
+
+ bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size);
+
+ uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT;
+ bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size;
+
+ return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx;
+}
+
+void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg)
+{
+ struct mlx5_bfreg_data *bfregs;
+ struct mlx5_uars_page *up;
+ struct mutex *lock; /* pointer to right mutex */
+ unsigned int dbi;
+ bool fp;
+ unsigned int *avail;
+ unsigned long *bitmap;
+ struct list_head *head;
+
+ bfregs = &mdev->priv.bfregs;
+ if (bfreg->wc) {
+ head = &bfregs->wc_head.list;
+ lock = &bfregs->wc_head.lock;
+ } else {
+ head = &bfregs->reg_head.list;
+ lock = &bfregs->reg_head.lock;
+ }
+ up = bfreg->up;
+ dbi = addr_to_dbi_in_syspage(mdev, up, bfreg);
+ fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR;
+ if (fp) {
+ avail = &up->fp_avail;
+ bitmap = up->fp_bitmap;
+ } else {
+ avail = &up->reg_avail;
+ bitmap = up->reg_bitmap;
+ }
+ mutex_lock(lock);
+ (*avail)++;
+ set_bit(dbi, bitmap);
+ if (*avail == 1)
+ list_add_tail(&up->list, head);
+
+ kref_put(&up->ref_count, up_rel_func);
+ mutex_unlock(lock);
+}
+EXPORT_SYMBOL(mlx5_free_bfreg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
new file mode 100644
index 000000000..b02af317c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -0,0 +1,1203 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/export.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+
+/* Mutex to hold while enabling or disabling RoCE */
+static DEFINE_MUTEX(mlx5_roce_en_lock);
+
+static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
+ u16 vport, u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {0};
+
+ MLX5_SET(query_vport_state_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_STATE);
+ MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(query_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_state_in, in, other_vport, 1);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
+u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
+
+ _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
+
+ return MLX5_GET(query_vport_state_out, out, state);
+}
+
+int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
+ u16 vport, u8 state)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
+
+ MLX5_SET(modify_vport_state_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_VPORT_STATE);
+ MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(modify_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(modify_vport_state_in, in, other_vport, 1);
+ MLX5_SET(modify_vport_state_in, in, admin_state, state);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
+static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+}
+
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *min_inline)
+{
+ u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+ int err;
+
+ err = mlx5_query_nic_vport_context(mdev, vport, out, sizeof(out));
+ if (!err)
+ *min_inline = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.min_wqe_inline_mode);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
+
+void mlx5_query_min_inline(struct mlx5_core_dev *mdev,
+ u8 *min_inline_mode)
+{
+ switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_L2:
+ *min_inline_mode = MLX5_INLINE_MODE_L2;
+ break;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
+ break;
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ *min_inline_mode = MLX5_INLINE_MODE_NONE;
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(mlx5_query_min_inline);
+
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+ u16 vport, u8 min_inline)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *nic_vport_ctx;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.min_inline, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ min_wqe_inline_mode, min_inline);
+
+ return mlx5_modify_nic_vport_context(mdev, in, inlen);
+}
+
+int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *addr)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u8 *out_addr;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context.permanent_address);
+
+ err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+ if (!err)
+ ether_addr_copy(addr, &out_addr[2]);
+
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
+
+int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *addr)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+ void *nic_vport_ctx;
+ u8 *perm_mac;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.permanent_address, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+
+ if (vport)
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx,
+ permanent_address);
+
+ ether_addr_copy(&perm_mac[2], addr);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
+
+int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ if (!err)
+ *mtu = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.mtu);
+
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mtu);
+
+int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu);
+
+int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ u32 vport,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int *list_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
+ void *nic_vport_ctx;
+ int max_list_size;
+ int req_list_size;
+ int out_sz;
+ void *out;
+ int err;
+ int i;
+
+ req_list_size = *list_size;
+
+ max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+ 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+ if (req_list_size > max_list_size) {
+ mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n",
+ req_list_size, max_list_size);
+ req_list_size = max_list_size;
+ }
+
+ out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto out;
+
+ nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context);
+ req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size);
+
+ *list_size = req_list_size;
+ for (i = 0; i < req_list_size; i++) {
+ u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]) + 2;
+ ether_addr_copy(addr_list[i], mac_addr);
+ }
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list);
+
+int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int list_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ void *nic_vport_ctx;
+ int max_list_size;
+ int in_sz;
+ void *in;
+ int err;
+ int i;
+
+ max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+ 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+ if (list_size > max_list_size)
+ return -ENOSPC;
+
+ in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
+ memset(out, 0, sizeof(out));
+ in = kzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.addresses_list, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+ nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_type, list_type);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size, list_size);
+
+ for (i = 0; i < list_size; i++) {
+ u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]) + 2;
+ ether_addr_copy(curr_mac, addr_list[i]);
+ }
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
+
+int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
+ u32 vport,
+ u16 vlans[],
+ int *size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+ void *nic_vport_ctx;
+ int req_list_size;
+ int max_list_size;
+ int out_sz;
+ void *out;
+ int err;
+ int i;
+
+ req_list_size = *size;
+ max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
+ if (req_list_size > max_list_size) {
+ mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n",
+ req_list_size, max_list_size);
+ req_list_size = max_list_size;
+ }
+
+ out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ req_list_size * MLX5_ST_SZ_BYTES(vlan_layout);
+
+ memset(in, 0, sizeof(in));
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, allowed_list_type,
+ MLX5_NVPRT_LIST_TYPE_VLAN);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto out;
+
+ nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context);
+ req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size);
+
+ *size = req_list_size;
+ for (i = 0; i < req_list_size; i++) {
+ void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]);
+ vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan);
+ }
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans);
+
+int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
+ u16 vlans[],
+ int list_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ void *nic_vport_ctx;
+ int max_list_size;
+ int in_sz;
+ void *in;
+ int err;
+ int i;
+
+ max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
+
+ if (list_size > max_list_size)
+ return -ENOSPC;
+
+ in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ list_size * MLX5_ST_SZ_BYTES(vlan_layout);
+
+ memset(out, 0, sizeof(out));
+ in = kzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.addresses_list, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+ nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size, list_size);
+
+ for (i = 0; i < list_size; i++) {
+ void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]);
+ MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]);
+ }
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
+
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+ u64 *system_image_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.system_image_guid);
+
+ kvfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.node_guid);
+
+ kvfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
+
+int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
+ u32 vport, u64 node_guid)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *nic_vport_context;
+ void *in;
+ int err;
+
+ if (!vport)
+ return -EINVAL;
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+ return -EACCES;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.node_guid, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport);
+
+ nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+ u16 *qkey_viol_cntr)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.qkey_violation_counter);
+
+ kvfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
+
+int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
+ u8 port_num, u16 vf_num, u16 gid_index,
+ union ib_gid *gid)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_in);
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_out);
+ int is_group_manager;
+ void *out = NULL;
+ void *in = NULL;
+ union ib_gid *tmp;
+ int tbsz;
+ int nout;
+ int err;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ tbsz = mlx5_get_gid_table_len(MLX5_CAP_GEN(dev, gid_table_size));
+ mlx5_core_dbg(dev, "vf_num %d, index %d, gid_table_size %d\n",
+ vf_num, gid_index, tbsz);
+
+ if (gid_index > tbsz && gid_index != 0xffff)
+ return -EINVAL;
+
+ if (gid_index == 0xffff)
+ nout = tbsz;
+ else
+ nout = 1;
+
+ out_sz += nout * sizeof(*gid);
+
+ in = kzalloc(in_sz, GFP_KERNEL);
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(query_hca_vport_gid_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_GID);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_hca_vport_gid_in, in, vport_number, vf_num);
+ MLX5_SET(query_hca_vport_gid_in, in, other_vport, 1);
+ } else {
+ err = -EPERM;
+ goto out;
+ }
+ }
+ MLX5_SET(query_hca_vport_gid_in, in, gid_index, gid_index);
+
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_hca_vport_gid_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
+ if (err)
+ goto out;
+
+ tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out);
+ gid->global.subnet_prefix = tmp->global.subnet_prefix;
+ gid->global.interface_id = tmp->global.interface_id;
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid);
+
+int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
+ u8 port_num, u16 vf_num, u16 pkey_index,
+ u16 *pkey)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_in);
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_out);
+ int is_group_manager;
+ void *out = NULL;
+ void *in = NULL;
+ void *pkarr;
+ int nout;
+ int tbsz;
+ int err;
+ int i;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+
+ tbsz = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size));
+ if (pkey_index > tbsz && pkey_index != 0xffff)
+ return -EINVAL;
+
+ if (pkey_index == 0xffff)
+ nout = tbsz;
+ else
+ nout = 1;
+
+ out_sz += nout * MLX5_ST_SZ_BYTES(pkey);
+
+ in = kzalloc(in_sz, GFP_KERNEL);
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(query_hca_vport_pkey_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_hca_vport_pkey_in, in, vport_number, vf_num);
+ MLX5_SET(query_hca_vport_pkey_in, in, other_vport, 1);
+ } else {
+ err = -EPERM;
+ goto out;
+ }
+ }
+ MLX5_SET(query_hca_vport_pkey_in, in, pkey_index, pkey_index);
+
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_hca_vport_pkey_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
+ if (err)
+ goto out;
+
+ pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey);
+ for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey))
+ *pkey = MLX5_GET_PR(pkey, pkarr, pkey);
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey);
+
+int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
+ u8 other_vport, u8 port_num,
+ u16 vf_num,
+ struct mlx5_hca_vport_context *rep)
+{
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
+ int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {0};
+ int is_group_manager;
+ void *out;
+ void *ctx;
+ int err;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT);
+
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_hca_vport_context_in, in, other_vport, 1);
+ MLX5_SET(query_hca_vport_context_in, in, vport_number, vf_num);
+ } else {
+ err = -EPERM;
+ goto ex;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_hca_vport_context_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto ex;
+
+ ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context);
+ rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select);
+ rep->sm_virt_aware = MLX5_GET_PR(hca_vport_context, ctx, sm_virt_aware);
+ rep->has_smi = MLX5_GET_PR(hca_vport_context, ctx, has_smi);
+ rep->has_raw = MLX5_GET_PR(hca_vport_context, ctx, has_raw);
+ rep->policy = MLX5_GET_PR(hca_vport_context, ctx, vport_state_policy);
+ rep->phys_state = MLX5_GET_PR(hca_vport_context, ctx,
+ port_physical_state);
+ rep->vport_state = MLX5_GET_PR(hca_vport_context, ctx, vport_state);
+ rep->port_physical_state = MLX5_GET_PR(hca_vport_context, ctx,
+ port_physical_state);
+ rep->port_guid = MLX5_GET64_PR(hca_vport_context, ctx, port_guid);
+ rep->node_guid = MLX5_GET64_PR(hca_vport_context, ctx, node_guid);
+ rep->cap_mask1 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask1);
+ rep->cap_mask1_perm = MLX5_GET_PR(hca_vport_context, ctx,
+ cap_mask1_field_select);
+ rep->cap_mask2 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask2);
+ rep->cap_mask2_perm = MLX5_GET_PR(hca_vport_context, ctx,
+ cap_mask2_field_select);
+ rep->lid = MLX5_GET_PR(hca_vport_context, ctx, lid);
+ rep->init_type_reply = MLX5_GET_PR(hca_vport_context, ctx,
+ init_type_reply);
+ rep->lmc = MLX5_GET_PR(hca_vport_context, ctx, lmc);
+ rep->subnet_timeout = MLX5_GET_PR(hca_vport_context, ctx,
+ subnet_timeout);
+ rep->sm_lid = MLX5_GET_PR(hca_vport_context, ctx, sm_lid);
+ rep->sm_sl = MLX5_GET_PR(hca_vport_context, ctx, sm_sl);
+ rep->qkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx,
+ qkey_violation_counter);
+ rep->pkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx,
+ pkey_violation_counter);
+ rep->grh_required = MLX5_GET_PR(hca_vport_context, ctx, grh_required);
+ rep->sys_image_guid = MLX5_GET64_PR(hca_vport_context, ctx,
+ system_image_guid);
+
+ex:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context);
+
+int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
+ u64 *sys_image_guid)
+{
+ struct mlx5_hca_vport_context *rep;
+ int err;
+
+ rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep);
+ if (!err)
+ *sys_image_guid = rep->sys_image_guid;
+
+ kfree(rep);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid);
+
+int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
+ u64 *node_guid)
+{
+ struct mlx5_hca_vport_context *rep;
+ int err;
+
+ rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep);
+ if (!err)
+ *node_guid = rep->node_guid;
+
+ kfree(rep);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
+
+int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ u32 vport,
+ int *promisc_uc,
+ int *promisc_mc,
+ int *promisc_all)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+ if (err)
+ goto out;
+
+ *promisc_uc = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_uc);
+ *promisc_mc = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_mc);
+ *promisc_all = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_all);
+
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc);
+
+int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ int promisc_uc,
+ int promisc_mc,
+ int promisc_all)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_uc, promisc_uc);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_mc, promisc_mc);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_all, promisc_all);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+
+enum {
+ UC_LOCAL_LB,
+ MC_LOCAL_LB
+};
+
+int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, disable_local_lb_mc) &&
+ !MLX5_CAP_GEN(mdev, disable_local_lb_uc))
+ return 0;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.disable_mc_local_lb, !enable);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.disable_uc_local_lb, !enable);
+
+ if (MLX5_CAP_GEN(mdev, disable_local_lb_mc))
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.disable_mc_local_lb, 1);
+
+ if (MLX5_CAP_GEN(mdev, disable_local_lb_uc))
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.disable_uc_local_lb, 1);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ if (!err)
+ mlx5_core_dbg(mdev, "%s local_lb\n",
+ enable ? "enable" : "disable");
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_update_local_lb);
+
+int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 *out;
+ int value;
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ if (err)
+ goto out;
+
+ value = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.disable_mc_local_lb) << MC_LOCAL_LB;
+
+ value |= MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.disable_uc_local_lb) << UC_LOCAL_LB;
+
+ *status = !value;
+
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb);
+
+enum mlx5_vport_roce_state {
+ MLX5_VPORT_ROCE_DISABLED = 0,
+ MLX5_VPORT_ROCE_ENABLED = 1,
+};
+
+static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
+ enum mlx5_vport_roce_state state)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+ state);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (!mdev->roce.roce_en)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+
+ if (!err)
+ mdev->roce.roce_en++;
+ mutex_unlock(&mlx5_roce_en_lock);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
+
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (mdev->roce.roce_en) {
+ mdev->roce.roce_en--;
+ if (mdev->roce.roce_en == 0)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+
+ if (err)
+ mdev->roce.roce_en++;
+ }
+ mutex_unlock(&mlx5_roce_en_lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
+
+int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
+ int vf, u8 port_num, void *out,
+ size_t out_sz)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
+ int is_group_manager;
+ void *in;
+ int err;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ MLX5_SET(query_vport_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+ MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1);
+ } else {
+ err = -EPERM;
+ goto free;
+ }
+ }
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_vport_counter_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
+free:
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
+
+int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
+ u64 *rx_discard_vport_down,
+ u64 *tx_discard_vport_down)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+ int err;
+
+ MLX5_SET(query_vnic_env_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VNIC_ENV);
+ MLX5_SET(query_vnic_env_in, in, op_mod, 0);
+ MLX5_SET(query_vnic_env_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *rx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+ vport_env.receive_discard_vport_down);
+ *tx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+ vport_env.transmit_discard_vport_down);
+ return 0;
+}
+
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+ u8 other_vport, u8 port_num,
+ int vf,
+ struct mlx5_hca_vport_context *req)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in);
+ u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)];
+ int is_group_manager;
+ void *in;
+ int err;
+ void *ctx;
+
+ mlx5_core_dbg(dev, "vf %d\n", vf);
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ in = kzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ memset(out, 0, sizeof(out));
+ MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf);
+ } else {
+ err = -EPERM;
+ goto ex;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev, num_ports) > 1)
+ MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num);
+
+ ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context);
+ MLX5_SET(hca_vport_context, ctx, field_select, req->field_select);
+ MLX5_SET(hca_vport_context, ctx, sm_virt_aware, req->sm_virt_aware);
+ MLX5_SET(hca_vport_context, ctx, has_smi, req->has_smi);
+ MLX5_SET(hca_vport_context, ctx, has_raw, req->has_raw);
+ MLX5_SET(hca_vport_context, ctx, vport_state_policy, req->policy);
+ MLX5_SET(hca_vport_context, ctx, port_physical_state, req->phys_state);
+ MLX5_SET(hca_vport_context, ctx, vport_state, req->vport_state);
+ MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
+ MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, req->cap_mask1_perm);
+ MLX5_SET(hca_vport_context, ctx, cap_mask2, req->cap_mask2);
+ MLX5_SET(hca_vport_context, ctx, cap_mask2_field_select, req->cap_mask2_perm);
+ MLX5_SET(hca_vport_context, ctx, lid, req->lid);
+ MLX5_SET(hca_vport_context, ctx, init_type_reply, req->init_type_reply);
+ MLX5_SET(hca_vport_context, ctx, lmc, req->lmc);
+ MLX5_SET(hca_vport_context, ctx, subnet_timeout, req->subnet_timeout);
+ MLX5_SET(hca_vport_context, ctx, sm_lid, req->sm_lid);
+ MLX5_SET(hca_vport_context, ctx, sm_sl, req->sm_sl);
+ MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter);
+ MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter);
+ err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ex:
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
+
+int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
+ struct mlx5_core_dev *port_mdev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = mlx5_nic_vport_enable_roce(port_mdev);
+ if (err)
+ goto free;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id,
+ MLX5_CAP_GEN(master_mdev, vhca_id));
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliation_criteria,
+ MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
+
+ err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+ if (err)
+ mlx5_nic_vport_disable_roce(port_mdev);
+
+free:
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport);
+
+int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id, 0);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliation_criteria, 0);
+
+ err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+ if (!err)
+ mlx5_nic_vport_disable_roce(port_mdev);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
new file mode 100644
index 000000000..ddca327e8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "wq.h"
+#include "mlx5_core.h"
+
+u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
+{
+ return (u32)wq->fbc.sz_m1 + 1;
+}
+
+u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
+{
+ return wq->fbc.sz_m1 + 1;
+}
+
+u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
+{
+ return (u32)wq->fbc.sz_m1 + 1;
+}
+
+static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
+{
+ return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride;
+}
+
+static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
+{
+ return mlx5_wq_cyc_get_byte_size(&wq->rq) +
+ mlx5_wq_cyc_get_byte_size(&wq->sq);
+}
+
+static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
+{
+ return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
+}
+
+static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
+{
+ return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride;
+}
+
+int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_cyc *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
+ int err;
+
+ mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
+ MLX5_GET(wq, wqc, log_wq_sz),
+ fbc);
+ wq->sz = wq->fbc.sz_m1 + 1;
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ fbc->frag_buf = wq_ctrl->buf;
+ wq->db = wq_ctrl->db.db;
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf,
+ struct mlx5_wq_qp *qp)
+{
+ struct mlx5_frag_buf_ctrl *sq_fbc;
+ struct mlx5_frag_buf *rqb, *sqb;
+
+ rqb = &qp->rq.fbc.frag_buf;
+ *rqb = *buf;
+ rqb->size = mlx5_wq_cyc_get_byte_size(&qp->rq);
+ rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE);
+
+ sq_fbc = &qp->sq.fbc;
+ sqb = &sq_fbc->frag_buf;
+ *sqb = *buf;
+ sqb->size = mlx5_wq_cyc_get_byte_size(&qp->sq);
+ sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE);
+ sqb->frags += rqb->npages; /* first part is for the rq */
+ if (sq_fbc->strides_offset)
+ sqb->frags--;
+}
+
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ u16 sq_strides_offset;
+ u32 rq_pg_remainder;
+ int err;
+
+ mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
+ MLX5_GET(qpc, qpc, log_rq_size),
+ &wq->rq.fbc);
+
+ rq_pg_remainder = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE;
+ sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB;
+
+ mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
+ MLX5_GET(qpc, qpc, log_sq_size),
+ sq_strides_offset,
+ &wq->sq.fbc);
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq);
+
+ wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR];
+ wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR];
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *cqc, struct mlx5_cqwq *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ int err;
+
+ mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+ &wq_ctrl->buf,
+ param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n",
+ err);
+ goto err_db_free;
+ }
+
+ wq->fbc.frag_buf = wq_ctrl->buf;
+ wq->db = wq_ctrl->db.db;
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_ll *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
+ struct mlx5_wqe_srq_next_seg *next_seg;
+ int err;
+ int i;
+
+ mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
+ MLX5_GET(wq, wqc, log_wq_sz),
+ fbc);
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ wq->fbc.frag_buf = wq_ctrl->buf;
+ wq->db = wq_ctrl->db.db;
+
+ for (i = 0; i < fbc->sz_m1; i++) {
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ next_seg->next_wqe_index = cpu_to_be16(i + 1);
+ }
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ wq->tail_next = &next_seg->next_wqe_index;
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
+{
+ mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
+ mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
new file mode 100644
index 000000000..b1293d153
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_WQ_H__
+#define __MLX5_WQ_H__
+
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+
+struct mlx5_wq_param {
+ int buf_numa_node;
+ int db_numa_node;
+};
+
+struct mlx5_wq_ctrl {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_frag_buf buf;
+ struct mlx5_db db;
+};
+
+struct mlx5_wq_cyc {
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ u16 sz;
+ u16 wqe_ctr;
+ u16 cur_sz;
+};
+
+struct mlx5_wq_qp {
+ struct mlx5_wq_cyc rq;
+ struct mlx5_wq_cyc sq;
+};
+
+struct mlx5_cqwq {
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ u32 cc; /* consumer counter */
+};
+
+struct mlx5_wq_ll {
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ __be16 *tail_next;
+ u16 head;
+ u16 wqe_ctr;
+ u16 cur_sz;
+};
+
+int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_cyc *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
+
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+
+int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *cqc, struct mlx5_cqwq *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
+
+int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_ll *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq);
+
+void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
+
+static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq)
+{
+ return wq->cur_sz == wq->sz;
+}
+
+static inline int mlx5_wq_cyc_missing(struct mlx5_wq_cyc *wq)
+{
+ return wq->sz - wq->cur_sz;
+}
+
+static inline int mlx5_wq_cyc_is_empty(struct mlx5_wq_cyc *wq)
+{
+ return !wq->cur_sz;
+}
+
+static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq)
+{
+ wq->wqe_ctr++;
+ wq->cur_sz++;
+}
+
+static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n)
+{
+ wq->wqe_ctr += n;
+ wq->cur_sz += n;
+}
+
+static inline void mlx5_wq_cyc_pop(struct mlx5_wq_cyc *wq)
+{
+ wq->cur_sz--;
+}
+
+static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq)
+{
+ *wq->db = cpu_to_be32(wq->wqe_ctr);
+}
+
+static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
+{
+ return ctr & wq->fbc.sz_m1;
+}
+
+static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
+{
+ return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
+}
+
+static inline u16 mlx5_wq_cyc_get_tail(struct mlx5_wq_cyc *wq)
+{
+ return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr - wq->cur_sz);
+}
+
+static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+}
+
+static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
+}
+
+static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
+{
+ int equal = (cc1 == cc2);
+ int smaller = 0x8000 & (cc1 - cc2);
+
+ return !equal && !smaller;
+}
+
+static inline u32 mlx5_cqwq_ctr2ix(struct mlx5_cqwq *wq, u32 ctr)
+{
+ return ctr & wq->fbc.sz_m1;
+}
+
+static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
+{
+ return mlx5_cqwq_ctr2ix(wq, wq->cc);
+}
+
+static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
+{
+ return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+}
+
+static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr)
+{
+ return ctr >> wq->fbc.log_sz;
+}
+
+static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
+{
+ return mlx5_cqwq_get_ctr_wrap_cnt(wq, wq->cc);
+}
+
+static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq)
+{
+ wq->cc++;
+}
+
+static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq)
+{
+ *wq->db = cpu_to_be32(wq->cc & 0xffffff);
+}
+
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
+{
+ u32 ci = mlx5_cqwq_get_ci(wq);
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
+ u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
+ u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
+
+ if (cqe_ownership_bit != sw_ownership_val)
+ return NULL;
+
+ /* ensure cqe content is read after cqe ownership bit */
+ dma_rmb();
+
+ return cqe;
+}
+
+static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq)
+{
+ return wq->cur_sz == wq->fbc.sz_m1;
+}
+
+static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq)
+{
+ return !wq->cur_sz;
+}
+
+static inline int mlx5_wq_ll_missing(struct mlx5_wq_ll *wq)
+{
+ return wq->fbc.sz_m1 - wq->cur_sz;
+}
+
+static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+}
+
+static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next)
+{
+ wq->head = head_next;
+ wq->wqe_ctr++;
+ wq->cur_sz++;
+}
+
+static inline void mlx5_wq_ll_pop(struct mlx5_wq_ll *wq, __be16 ix,
+ __be16 *next_tail_next)
+{
+ *wq->tail_next = ix;
+ wq->tail_next = next_tail_next;
+ wq->cur_sz--;
+}
+
+static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq)
+{
+ *wq->db = cpu_to_be32(wq->wqe_ctr);
+}
+
+#endif /* __MLX5_WQ_H__ */